In [0]:
%sql
select * from analytics.bronze.erp_cust_az12

In [0]:
%sql
USE CATALOG analytics;
USE SCHEMA silver

## Read bronze table

In [0]:
df = spark.table("analytics.bronze.erp_cust_az12")
df.display()

## Trimming string columns

In [0]:
from pyspark.sql.functions import trim, col
from pyspark.sql.types import StringType

for f in df.schema.fields:
    if isinstance(f.dataType, StringType):
        df = df.withColumn(f.name, trim(col(f.name)))

df.display()

## Normalize BirthDate Column

In [0]:
from pyspark.sql.functions import current_date, when

df = df.withColumn(
    "BDATE",
    when(col("BDATE") > current_date(), None)
    .otherwise(col("BDATE"))
)

df.display()

In [0]:
%sql
select distinct(GEN) from analytics.bronze.erp_cust_az12

## Normalize gender column

In [0]:
from pyspark.sql.functions import when

df = (
    df.withColumn(
        "GEN",
        when(col("GEN").isin("M", "Male"), "Male")
        .when(col("GEN").isin("F", "Female"), "Female")
        .otherwise(None)
    )
)

df.display()

## Rename Columns

In [0]:
df = (
    df
    .withColumnRenamed("CID", "customer_id")
    .withColumnRenamed("BDATE", "birth_date")
    .withColumnRenamed("GEN", "gender") 
   

)

df.display()

## Write Silver Table

In [0]:
df.write \
  .mode("overwrite") \
  .format("delta") \
  .saveAsTable("analytics.silver.erp_customers")


In [0]:
spark.table("silver.erp_customers").display()