In [0]:
# Databricks notebook: Silver Layer - Data Cleaning & Enrichment
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_timestamp, month, when, lit

spark = SparkSession.builder.getOrCreate()

# ================================
# 1. Read Bronze Table
# ================================
bronze_df = spark.table("Bronze_Retail")
print(f"🔹 Bronze record count: {bronze_df.count()}")

# ================================
# 2. Data Cleaning & Type Casting
# ================================
silver_df = (
    bronze_df
    .withColumn("Date", to_timestamp(col("Date"), "M/d/yyyy H:mm"))
    .withColumn("Discount_Applied", col("Discount_Applied").cast("boolean"))
    .withColumn("Total_Items", col("Total_Items").cast("int"))
    .withColumn("Total_Cost", col("Total_Cost").cast("double"))
)

# Optional: Derive "Season" column for better analytics
silver_df = silver_df.withColumn("MonthNum", month(col("Date")))
silver_df = silver_df.withColumn(
    "Season",
    when(col("MonthNum").isin(12, 1, 2), lit("Winter"))
    .when(col("MonthNum").isin(3, 4, 5), lit("Spring"))
    .when(col("MonthNum").isin(6, 7, 8), lit("Summer"))
    .when(col("MonthNum").isin(9, 10, 11), lit("Fall"))
)

# Drop helper column
silver_df = silver_df.drop("MonthNum")

print("✅ Sample of cleaned & enriched data:")
silver_df.show(5, truncate=False)

# ================================
# 3. Save Silver Table (Cleaned Data)
# ================================
silver_df.write.format("delta").mode("overwrite").saveAsTable("Silver_Retail")

print(f"✅ Silver layer saved successfully with {silver_df.count()} records.")
