###Importing Pyspark Functions

In [0]:
from pyspark.sql.functions import *

###Reading data from bronze file

In [0]:
tran_silver_df = spark.read.table("retail_analytics.bronze.transactions")

###Basic cleaning process and handling null values

In [0]:
tran_silver_df = (
    spark.read.table("retail_analytics.bronze.transactions")
    .dropDuplicates(["Invoice_ID", "Line"])
    .withColumn("Size", trim(col("Size")))
    .withColumn("Color", trim(col("Color")))
    .withColumn("Currency", trim(col("Currency")))
    .withColumn("Currency_Symbol", trim(col("Currency_Symbol")))
    .withColumn("SKU", trim(col("SKU")))
    .withColumn("Transaction_Type", trim(col("Transaction_Type")))
    .withColumn("Payment_Method", trim(col("Payment_Method")))
    .withColumn(
        "Discount",
        when(col("Discount").isNull(), 0.0).otherwise(col("Discount"))
    )
    .withColumn("invoice_date", to_date(col("Date")))
    .fillna({
        "Size": "Not available",
        "Color": "Not available",
        "Currency": "Not available",
        "Currency_Symbol": "Not available",
        "SKU": "Not available",
        "Transaction_Type": "Not available",
        "Payment_Method": "Not available"
    })
)

###Saving the table

In [0]:
(tran_silver_df.write
 .format("delta")
 .mode("overwrite")
 .saveAsTable("retail_analytics.silver.transactions")
)

In [0]:
spark.read.table("retail_analytics.silver.transactions").limit(5).display()

Invoice_ID,Line,Customer_ID,Product_ID,Size,Color,Unit_Price,Quantity,Date,Discount,Line_Total,Store_ID,Employee_ID,Currency,Currency_Symbol,SKU,Transaction_Type,Payment_Method,Invoice_Total,ingestion_ts,invoice_date
INV-US-005-04342444,1,24325,12982,M,Not available,137.0,1,2024-11-27T14:41:00.000Z,0.6,54.8,5,60,USD,$,FESU12982-M-,Sale,Credit Card,54.8,2026-01-16T15:06:42.722Z,2024-11-27
INV-US-005-04343664,1,295733,13343,M,Not available,37.5,1,2024-12-04T20:22:00.000Z,0.0,37.5,5,58,USD,$,FET-13343-M-,Sale,Cash,37.5,2026-01-16T15:06:42.722Z,2024-12-04
INV-US-005-04343819,1,285979,14936,M,PINK,59.0,1,2024-12-05T16:50:00.000Z,0.0,59.0,5,58,USD,$,MACO14936-M-PINK,Sale,Credit Card,113.0,2026-01-16T15:06:42.722Z,2024-12-05
INV-US-005-04343943,1,272836,14777,L,Not available,36.0,1,2024-12-05T19:29:00.000Z,0.0,36.0,5,58,USD,$,FESP14777-L-,Sale,Credit Card,36.0,2026-01-16T15:06:42.722Z,2024-12-05
INV-US-005-04348168,7,282978,13486,XXL,Not available,60.5,2,2024-12-21T08:30:00.000Z,0.5,60.5,5,55,USD,$,MASP13486-XXL-,Sale,Credit Card,247.75,2026-01-16T15:06:42.722Z,2024-12-21
