###Importing Pyspark Functions

In [0]:
from pyspark.sql.functions import *

###Creating schema for silver layer

In [0]:
%sql
create schema if not exists retail_analytics.silver;

###Reading data from bronze file.

In [0]:
cust_silver_df = spark.read.table("retail_analytics.bronze.customers")

###Basic cleaning process

In [0]:
cust_silver_df = (
    cust_silver_df
    .dropDuplicates(["Customer_ID"])
    .withColumn("Email", lower(col("Email")))
    .withColumn("City", trim(col("City")))
    .withColumn("Country", trim(col("Country")))
    .withColumn("Gender", upper(col("Gender")))
    .withColumn("ingestion_date", to_date(col("ingestion_ts")))
)

###Handling null values

In [0]:
#Replace null values with "Not available"
cust_silver_df = cust_silver_df.fillna("Not available")

###Gender column cleaning.

In [0]:
#Gender col cleaning.
cust_silver_df = cust_silver_df.withColumn(
    "Gender",
    when(col("Gender") == "M", "Male")
    .when(col("Gender") == "F", "Female")
    .otherwise(col("Gender"))
)

###Saving the table

In [0]:
(cust_silver_df.write
 .format("delta")
 .mode("overwrite")
 .saveAsTable("retail_analytics.silver.customers")
)

In [0]:
display(spark.read.table("retail_analytics.silver.customers").limit(5))

Customer_ID,Name,Email,Telephone,City,Country,Gender,Date_Of_Birth,Job_Title,ingestion_ts,ingestion_date
103,Anthony Bishop,anthony.bishop@fake_gmail.com,001-866-760-1652x98477,New York,United States,Male,1992-05-29,Exhibition designer,2026-01-16T05:29:29.853Z,2026-01-16
517,Laura Smith,laura.smith@fake_gmail.com,9517619965,New York,United States,Female,2003-12-17,Not available,2026-01-16T05:29:29.853Z,2026-01-16
1435,Sarah Riley,sarah.riley@fake_hotmail.com,+1-804-731-4265x4950,New York,United States,Female,2005-03-27,Not available,2026-01-16T05:29:29.853Z,2026-01-16
3029,Kyle Russell,kyle.russell@fake_yahoo.com,+1-458-815-3978x39436,New York,United States,Male,1997-07-29,Advertising art director,2026-01-16T05:29:29.853Z,2026-01-16
4010,Jody Patterson,jody.patterson@fake_yahoo.com,(910)266-9714,New York,United States,Female,1982-06-22,"Engineer, chemical",2026-01-16T05:29:29.853Z,2026-01-16
