In [0]:
# Read from Bronze layer
bronze_df = spark.table("ecommerce.bronze.bronze_online_retail")

display(bronze_df)

InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01T08:26:00.000Z,2.55,17850,United Kingdom
536365,71053,WHITE METAL LANTERN,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom
536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01T08:26:00.000Z,2.75,17850,United Kingdom
536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom
536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom
536365,22752,SET 7 BABUSHKA NESTING BOXES,2,2010-12-01T08:26:00.000Z,7.65,17850,United Kingdom
536365,21730,GLASS STAR FROSTED T-LIGHT HOLDER,6,2010-12-01T08:26:00.000Z,4.25,17850,United Kingdom
536366,22633,HAND WARMER UNION JACK,6,2010-12-01T08:28:00.000Z,1.85,17850,United Kingdom
536366,22632,HAND WARMER RED POLKA DOT,6,2010-12-01T08:28:00.000Z,1.85,17850,United Kingdom
536367,84879,ASSORTED COLOUR BIRD ORNAMENT,32,2010-12-01T08:34:00.000Z,1.69,13047,United Kingdom


In [0]:
# Apply DATA QUALITY RULES
from pyspark.sql.functions import col, to_timestamp

silver_df = (
    bronze_df
    # Remove records without CustomerID
    .filter(col("CustomerID").isNotNull())
    
    # Remove cancelled invoices
    .filter(~col("InvoiceNo").startswith("C"))
    
    # Convert InvoiceDate to timestamp
    .withColumn(
        "InvoiceDate",
        to_timestamp(col("InvoiceDate"))
    )
    
    # Create total transaction amount
    .withColumn(
        "total_amount",
        col("Quantity") * col("UnitPrice")
    )
)

display(silver_df)


InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,total_amount
536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01T08:26:00.000Z,2.55,17850,United Kingdom,15.3
536365,71053,WHITE METAL LANTERN,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom,20.34
536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01T08:26:00.000Z,2.75,17850,United Kingdom,22.0
536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom,20.34
536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01T08:26:00.000Z,3.39,17850,United Kingdom,20.34
536365,22752,SET 7 BABUSHKA NESTING BOXES,2,2010-12-01T08:26:00.000Z,7.65,17850,United Kingdom,15.3
536365,21730,GLASS STAR FROSTED T-LIGHT HOLDER,6,2010-12-01T08:26:00.000Z,4.25,17850,United Kingdom,25.5
536366,22633,HAND WARMER UNION JACK,6,2010-12-01T08:28:00.000Z,1.85,17850,United Kingdom,11.1
536366,22632,HAND WARMER RED POLKA DOT,6,2010-12-01T08:28:00.000Z,1.85,17850,United Kingdom,11.1
536367,84879,ASSORTED COLOUR BIRD ORNAMENT,32,2010-12-01T08:34:00.000Z,1.69,13047,United Kingdom,54.08


In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS ecommerce.silver;


In [0]:
# Write SILVER Delta Table
silver_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("ecommerce.silver.silver_online_retail")

In [0]:
# Verify Silver Table
%sql
SELECT COUNT(*) FROM ecommerce.silver.silver_online_retail;


COUNT(*)
532621
