# Initialization

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read bronze table

In [0]:
df = spark.table("workspace.bronze.erp_cust_az12")

# Transformations

**_Renaming fields_**

In [0]:
rename_fields = {
    "CID": "customer_id",
    "BDATE": "birth_date",
    "GEN": "gender"
}
for old_name, new_name in rename_fields.items():
    df = df.withColumnRenamed(old_name, new_name)
df.display()

**_Trimming string fields_**

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

**_customerid parsing_**

In [0]:
df = df.withColumn("customer_id", F.substring(col("customer_id"), 4, F.length(col("customer_id"))))

**_Birthdate validation_**

In [0]:
df = df.withColumn(
    "birth_date",
    F.when(col("birth_date") > F.current_date(), None)
     .otherwise(col("birth_date"))
)

**_Gender Normalization_**

In [0]:
df = df.withColumn(
    "gender",
    F.when(F.upper(col("gender")).isin("F", "FEMALE"), "Female")
     .when(F.upper(col("gender")).isin("M", "MALE"), "Male")
     .otherwise("n/a")
)

**_Sanity checks for data frame_**

In [0]:
df.limit(10).display()

**_write to silver table_**

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.erp_customers")

**_Sanity checks for silver table_**

In [0]:
%sql
SELECT * FROM workspace.silver.erp_customers LIMIT(10);