# Gold Notebook

### Silver → Gold Aggregation
#### Purpose: Create business-level metrics for reporting

# - All transformations are done in-memory (lazy evaluation applies)
# - Silver layer is considered the “clean source” for Gold
# - Gold layer is aggregated / ready for reporting / BI consumption

In [None]:
# Setup: Import Libraries

from pyspark.sql import SparkSession
from pyspark.sql.functions import sum as _sum, col
from pyspark.sql.functions import when, to_date


In [None]:
# Configure Storage Access
# Each notebook requires access to the storage account

storage_account = dbutils.secrets.get(scope="my_scope", key="storage_account")
storage_key = dbutils.secrets.get(scope="my_scope", key="storage_key")

spark.conf.set(
    f"fs.azure.account.key.{storage_account}.dfs.core.windows.net",
    storage_key
)


In [None]:
# Define Paths

df_silver = f"abfss://silver@{storage_account}.dfs.core.windows.net/supermarket_sales_cleaned/"
df_gold = f"abfss://gold@{storage_account}.dfs.core.windows.net/supermarket_sales_aggregated/"

In [None]:
# Read Silver Layer

df_silver = spark.read.format("parquet").load(df_silver)

display(df_silver.limit(5))  # Quick check of Silver data

In [None]:
# Transformations for Gold Layer
# Example: Aggregate total sales and profit by Region and Category

df_gold = (
    df_silver
    .groupBy("Region", "Category")
    .agg(
        _sum("Sales").alias("Total_Sales"),
        _sum("Profit").alias("Total_Profit")
    )
)

In [None]:
# Order results for readability

df_gold = df_gold.orderBy(col("Region"), col("Category"))

In [None]:
# Write Gold Layer

df_gold.write.format("parquet").mode("overwrite").save(df_gold)

In [None]:
# Display Gold Data

display(df_gold)