# ü•á Gold Layer ‚Äî Business Metrics

**Annie's Magic Numbers Medallion Architecture**

This notebook produces analytical tables for Power BI using clean Silver data.

### üîê Configuration ‚Äî ADLS Gen2 Authentication

In [None]:
spark.conf.set(
    "fs.azure.account.key.anniedatalake123.dfs.core.windows.net",
    "<PASTE_STORAGE_ACCOUNT_KEY_1_HERE>"
)

### üü¶ Path Setup & Imports

In [None]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

container_name = "annie-data"
storage_account = "anniedatalake123"

base_path = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net/"
silver_path = base_path + "silver/"
gold_path = base_path + "gold/"

### üü¶ Register Silver Tables as Temporary Views

In [None]:
spark.read.format("delta").load(silver_path + "sales").createOrReplaceTempView("sv_sales")
spark.read.format("delta").load(silver_path + "purchases").createOrReplaceTempView("sv_purchases")
spark.read.format("delta").load(silver_path + "beg_inventory").createOrReplaceTempView("sv_beg_inv")
spark.read.format("delta").load(silver_path + "end_inventory").createOrReplaceTempView("sv_end_inv")

print("‚úÖ  Temporary views registered from Silver paths.")

### üü¶ Helper Writer Function

In [None]:
def write_gold(df, table_name, partition_by=None):
    writer = (
        df.write
          .format("delta")
          .mode("overwrite")
          .option("overwriteSchema", "true")
    )
    if partition_by:
        writer = writer.partitionBy(partition_by)
    
    target_path = gold_path + table_name
    writer.save(target_path)
    
    count = spark.read.format("delta").load(target_path).count()
    print(f"   ‚úÖ  gold.{table_name} saved to {target_path}  ‚Üí  {count:,} rows")

### ü•á Gold ‚Äî Core Cost Lookup

In [None]:
cost_lookup = spark.sql("""
    SELECT
        brand,
        description,
        PERCENTILE_APPROX(cost_per_unit, 0.5)  AS median_cost_per_unit,
        AVG(cost_per_unit)                       AS avg_cost_per_unit
    FROM sv_purchases
    WHERE cost_per_unit IS NOT NULL AND cost_per_unit > 0
    GROUP BY brand, description
""")
cost_lookup.createOrReplaceTempView("cost_lookup")

### ü•á Gold ‚Äî Sales Enriched

In [None]:
sales_enriched = spark.sql("""
    SELECT
        s.*,
        COALESCE(c.median_cost_per_unit, s.sales_price * 0.60) AS cost_per_unit,
        ROUND(s.sales_dollars - (COALESCE(c.median_cost_per_unit, s.sales_price * 0.60) * s.sales_quantity), 2) AS profit_dollars,
        ROUND(CASE WHEN s.sales_dollars = 0 THEN NULL 
              ELSE ((s.sales_dollars - COALESCE(c.median_cost_per_unit, s.sales_price * 0.60) * s.sales_quantity) / s.sales_dollars) * 100 
              END, 2) AS margin_pct
    FROM sv_sales s
    LEFT JOIN cost_lookup c ON s.brand = c.brand AND s.description = c.description
""")
sales_enriched.createOrReplaceTempView("gold_se")
write_gold(sales_enriched, "sales_enriched", partition_by="brand")

### ü•á Gold ‚Äî Product & Brand Profitability

In [None]:
product_profitability = spark.sql("""
    SELECT brand, description, size, classification,
           SUM(sales_quantity) AS total_units_sold,
           ROUND(SUM(sales_dollars), 2) AS total_revenue,
           ROUND(SUM(profit_dollars), 2) AS total_profit_dollars,
           ROUND(AVG(margin_pct), 2) AS avg_margin_pct
    FROM gold_se
    GROUP BY brand, description, size, classification
""")
write_gold(product_profitability, "product_profitability")

brand_profitability = spark.sql("""
    SELECT brand, FIRST(classification) AS classification,
           ROUND(SUM(sales_dollars), 2) AS total_revenue,
           ROUND(SUM(profit_dollars), 2) AS total_profit_dollars,
           ROUND(AVG(margin_pct), 2) AS avg_margin_pct
    FROM gold_se
    GROUP BY brand
""")
write_gold(brand_profitability, "brand_profitability")