## Initialization

In [0]:

import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

## lets read Bronze table

In [0]:
df_bronze = spark.table("salesdb.bronze.crm_sales_data")
display(df_bronze)


## Lets do Silver Transformations  (lets fix the  schema and column names)

In [0]:
# for field in df_bronze.schema.fields:
#   print(field.name, field.dataType)

RENAME_MAP = {
    "Cust_ID": ("customer_id", "string"),
    "Cust_Name": ("customer_name", "string"),
    "Email": ("email", "string"),
    "Phone": ("phone", "string"),
    "Country": ("country", "string"),
    "State": ("state", "string"),
    "City": ("city", "string"),
    "Prodct_ID": ("product_id", "string"),
    "Prodct_Name": ("product_name", "string"),
    "Cat": ("category", "string"),
    "Price": ("price", "int"),
    "Qty": ("quantity", "int"),
    "Ord_ID": ("order_id", "string"),
    "Ord_Date": ("order_date", "string")    ,
    "SalesRep_ID": ("sales_rep_id", "string"),
    "SalesRep_Name": ("sales_rep_name", "string"),
    "Region": ("region", "string")  
}


# Rename columns
for old_name, (new_name, new_data_type) in RENAME_MAP.items():
    df_bronze = df_bronze.withColumnRenamed(old_name, new_name)

# Cast columns to specified types
for _, (new_name, new_data_type) in RENAME_MAP.items():
    df_bronze = df_bronze.withColumn(new_name, col(new_name).cast(new_data_type))

# Display the transformed DataFrame and validate the data and columns 
display(df_bronze)



## Look like theie are some invalid data with null price , lets drop these invalid rows 

In [0]:
df_with_not_null = df_bronze.filter(col("price").isNotNull())
display(df_with_not_null)

## look like data is clean now , lets write into the silver layer table

In [0]:
df_with_not_null.write.mode("overwrite").format("delta").saveAsTable("salesdb.silver.crm_sales_data") 

## lets check the data from silver layer table 

In [0]:
%sql
select * from salesdb.silver.crm_sales_data

In [0]:
%sql
select customer_name,sum(price*quantity) as total,category from salesdb.silver.crm_sales_data
group by customer_name,category
order by total desc