## **Initialization**

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import col, trim

## **Reading From Bronze Layer**

In [0]:
df = spark.table("workspace.bronze.erp_cust_az12")

## **Data Transformation**

### 1-Trimming String Type Data

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

### 2-Normalisation of gender

In [0]:
df = (

    df
    .withColumn("GEN",
                F.expr("""
                       CASE
                            WHEN UPPER(GEN) in ("M", "MALE") THEN "Male"
                            WHEN UPPER(GEN) in ("F", "FEMALE") THEN "Female"
                            ELSE "Unknown"
                       END
                       """)
                )

)

### 3-Customer ID Cleanup

In [0]:
df = (

    df
    .withColumn("CID",
                F.expr("""
                       CASE
                            WHEN LEFT(CID, 3) = 'NAS' THEN SUBSTRING(CID, 4, LEN(CID))
                            ELSE CID
                       END
                       """)
                )

)

### 4-Birthdate Validation

In [0]:
# Option 1 : using SQL inside expr("""""") to add Null
#df = (df.withColumn("BDATE", F.expr(""" CASE WHEN BDATE > current_date() THEN None ELSE BDATE END """)))

# Option 1 : using pyspark to add None
df = df.withColumn(
    "BDATE",
    F.when(F.col("BDATE") > F.current_date(), None).otherwise(F.col("BDATE"))
)


### 5-Renaming Columns

In [0]:
# Creating new column names in a dictionary
RENAME_MAP = {
    "CID": "customer_number",
    "BDATE": "birth_date",
    "GEN": "gender"
}

# Looping for all columns names and rename them
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

## **Write Into Silver Layer**

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.erp_customers")

## **Sanity Check Of Data Frame**

In [0]:
df.display()