### 🛠️ Day 6 Tasks:

1. Design 3-layer architecture
2. Build Bronze: raw ingestion
3. Build Silver: cleaning & validation
4. Build Gold: business aggregates

## Create Bronze, Silver, Gold Schemas

In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS workspace.ecommerce_bronze;
CREATE SCHEMA IF NOT EXISTS workspace.ecommerce_silver;
CREATE SCHEMA IF NOT EXISTS workspace.ecommerce_gold;

In [0]:
%sql SHOW SCHEMAS IN workspace

databaseName
default
ecommerce
ecommerce_bronze
ecommerce_gold
ecommerce_silver
information_schema


## Bronze Layer – Raw Ingestion

In [0]:
df_bronze = (
    spark.read
    .option("header", "true")
    .option("inferSchema", "true")
    .csv("/Volumes/workspace/ecommerce/ecommerce_data/")
)

df_bronze.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.ecommerce_bronze.orders")



## Silver Layer – Cleaning & Validation

In [0]:
from pyspark.sql.functions import col, to_timestamp

df_silver = (
    spark.read.table("workspace.ecommerce_bronze.orders")
    .filter(col("user_id").isNotNull())
    .filter(col("price") > 0)
    .withColumn("event_time", to_timestamp("event_time"))
)

df_silver.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.ecommerce_silver.orders_clean")


In [0]:
from pyspark.sql.functions import sum, count, avg, to_date

df_gold = (
    spark.read.table("workspace.ecommerce_silver.orders_clean")
    .withColumn("order_date", to_date("event_time"))
    .groupBy("order_date")
    .agg(
        sum("price").alias("total_revenue"),
        count("*").alias("total_orders"),
        avg("price").alias("avg_order_value")
    )
)

df_gold.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("workspace.ecommerce_gold.daily_sales")


In [0]:
display(df_gold)


order_date,total_revenue,total_orders,avg_order_value
2019-11-17,1876480491.4989712,6371159,294.5273366272873
2019-11-16,1998441656.940825,6460123,309.350403535788
2019-11-20,466821973.37990606,1695812,275.27931951177726
2019-11-18,585124794.6899247,2017005,290.09585731811507
2019-11-19,489910565.1598312,1725089,283.9914724166876
2019-11-24,447272961.9498782,1591284,281.0767669063965
2019-11-21,448364254.8998058,1673897,267.85653770799865
2019-11-23,440199629.35998046,1561182,281.96560641871383
2019-11-22,437934668.44978905,1565600,279.72321694544524
2019-11-25,440003052.7599251,1592360,276.3213423848408
