In [0]:
#Initialization

import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

In [0]:
#Read Bronze table

df = spark.table("workspace.bronze.erp_px_cat_g1v2")

In [0]:
#Silver Transformations
#Trimming

for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

In [0]:
#Normalize Maintenance Flag to Boolean

df = df.withColumn(
    "maintenance",
    F.when(F.upper(col("maintenance")) == "YES", F.lit(True))
     .when(F.upper(col("maintenance")) == "NO", F.lit(False))
     .otherwise(None)
)


In [0]:
#Renaming Columns

RENAME_MAP = {
    "id": "category_id",
    "cat": "category",
    "subcat": "subcategory",
    "maintenance": "maintenance_flag"
}
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

In [0]:
#Sanity checks of dataframe

df.limit(10).display()

In [0]:
#Writing Silver Table

df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.erp_product_category")

In [0]:
%sql
--Sanity checks of silver table----
SELECT * FROM workspace.silver.erp_product_category LIMIT 10