In [0]:
# 03_SCD_Type2_Dimensions

from pyspark.sql.functions import current_timestamp, lit, col

# Set context
spark.sql("USE CATALOG main")
spark.sql("USE SCHEMA ecommerce")

# ====================================================
# 1. DIMENSION: CUSTOMERS (SCD Type 2)
# ====================================================
print("Building SCD-2 for Customers...")

# Read from Silver
df_cust = spark.read.table("silver_customers")

# Add SCD Columns (Initial Load Logic)
# - Start Date: Now
# - End Date: NULL (means it is currently active)
# - is_active: true
df_dim_cust = df_cust \
    .withColumn("effective_start_date", current_timestamp()) \
    .withColumn("effective_end_date", lit(None).cast("timestamp")) \
    .withColumn("is_active", lit(True))

# Write as Managed Table
df_dim_cust.write.format("delta").mode("overwrite").saveAsTable("dim_customers")
print("--> Created Table: main.ecommerce.dim_customers")


# ====================================================
# 2. DIMENSION: PRODUCTS (SCD Type 2)
# ====================================================
print("Building SCD-2 for Products...")

# Read from Silver
df_prod = spark.read.table("silver_products")

# Add SCD Columns
df_dim_prod = df_prod \
    .withColumn("effective_start_date", current_timestamp()) \
    .withColumn("effective_end_date", lit(None).cast("timestamp")) \
    .withColumn("is_active", lit(True))

# Write as Managed Table
df_dim_prod.write.format("delta").mode("overwrite").saveAsTable("dim_products")
print("--> Created Table: main.ecommerce.dim_products")

print("------------------------------------------------")
print("SUCCESS: SCD Type 2 Dimensions Initialized!")