In [0]:
%sql
select * from analytics.bronze.erp_loc_a101

In [0]:
%sql
USE CATALOG analytics;
USE SCHEMA silver;

## Read Bronze Table

In [0]:
df = spark.table("analytics.bronze.erp_loc_a101")
df.display()

## Trim string cloumns

In [0]:
from pyspark.sql.functions import trim, col
from pyspark.sql.types import StringType

for f in df.schema.fields:
    if isinstance(f.dataType, StringType):
        df = df.withColumn(f.name, trim(col(f.name)))

df.display()

In [0]:
%sql
select distinct(CNTRY) from analytics.bronze.erp_loc_a101

## Normalize country column

In [0]:
from pyspark.sql.functions import when

df = (
    df
    .withColumn(
        "CNTRY",
        when(col("CNTRY").isin("US", "USA", "United States"), "United States")
        .when(col("CNTRY") == "DE", "Germany")
        .when(col("CNTRY") == "UK", "United Kingdom")
        .otherwise(col("CNTRY"))
    )
)

df.display()

## Rename Columns

In [0]:
df = (
    df
    .withColumnRenamed("CID", "customer_id")
    .withColumnRenamed("CNTRY", "country")
)

df.display()

## Write Silver Table

In [0]:
df.write \
  .mode("overwrite") \
  .format("delta") \
  .saveAsTable("analytics.silver.erp_customers_location")

In [0]:
spark.table("silver.erp_customers_location").display()