###Importing Pyspark Functions

In [0]:
from pyspark.sql.functions import *

###Reading data from bronze file

In [0]:
tran_silver_df = spark.read.table("retail_analytics.bronze.transactions")

###Basic cleaning process and handling null values

In [0]:
tran_silver_df = (
    spark.read.table("retail_analytics.bronze.transactions")
    .dropDuplicates()
    .withColumn("Size", trim(col("Size")))
    .withColumn("Color", trim(col("Color")))
    .withColumn("Currency", trim(col("Currency")))
    .withColumn("Currency_Symbol", trim(col("Currency_Symbol")))
    .withColumn("SKU", trim(col("SKU")))
    .withColumn("Transaction_Type", trim(col("Transaction_Type")))
    .withColumn("Payment_Method", trim(col("Payment_Method")))
    .withColumn(
        "Discount",
        when(col("Discount").isNull(), 0.0).otherwise(col("Discount"))
    )
    .withColumn("invoice_date", to_date(col("Date")))
    .fillna({
        "Size": "Not available",
        "Color": "Not available",
        "Currency": "Not available",
        "Currency_Symbol": "Not available",
        "SKU": "Not available",
        "Transaction_Type": "Not available",
        "Payment_Method": "Not available"
    })
)

###Saving the table

In [0]:
(tran_silver_df.write
 .format("delta")
 .mode("overwrite")
 .saveAsTable("retail_analytics.silver.transactions")
)

In [0]:
spark.read.table("retail_analytics.silver.transactions").limit(5).display()

Invoice_ID,Line,Customer_ID,Product_ID,Size,Color,Unit_Price,Quantity,Date,Discount,Line_Total,Store_ID,Employee_ID,Currency,Currency_Symbol,SKU,Transaction_Type,Payment_Method,Invoice_Total,ingestion_ts,invoice_date
INV-US-005-04341998,1,279470,12127,Not available,Not available,9.0,1,2024-11-25T19:04:00.000Z,0.0,9.0,5,55,USD,$,FEAC12127--,Sale,Credit Card,9.0,2026-01-16T15:06:42.722Z,2024-11-25
INV-US-005-04342548,1,293591,14783,L,Not available,24.0,1,2024-11-28T09:11:00.000Z,0.0,24.0,5,54,USD,$,MASH14783-L-,Sale,Credit Card,24.0,2026-01-16T15:06:42.722Z,2024-11-28
INV-US-005-04343568,1,268147,14259,Not available,Not available,23.0,1,2024-12-03T15:03:00.000Z,0.0,23.0,5,59,USD,$,FEAC14259--,Sale,Credit Card,23.0,2026-01-16T15:06:42.722Z,2024-12-03
INV-US-005-04344113,1,285857,13862,S,Not available,26.0,1,2024-12-06T19:52:00.000Z,0.0,26.0,5,59,USD,$,FESH13862-S-,Sale,Credit Card,26.0,2026-01-16T15:06:42.722Z,2024-12-06
INV-US-005-04344314,7,288058,13636,L,RED,59.0,3,2024-12-07T10:12:00.000Z,0.0,177.0,5,57,USD,$,MACO13636-L-RED,Sale,Cash,556.5,2026-01-16T15:06:42.722Z,2024-12-07


In [0]:
spark.read.table("retail_analytics.silver.transactions").count()

6416029