# Loading ERP Cust location 

## Import spark functions

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, DateType
from pyspark.sql.functions import col, when, lit, trim

### read the table

In [0]:
df = spark.read.table("dev_project.bronze.erp_loc_a101")

## Data Transformation - remove spaces

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

## customer id clean

In [0]:
df = df.withColumn("CID", F.regexp_replace(col("cid"), "-", ""))

## Country Normalization

In [0]:
df = df.withColumn(
                    "cntry",
                    F.when(col("cntry") == "DE", "Germany")
                    .when(col("cntry").isin("US", "USA"), "United States")
                    .when((col("cntry") == "") | col("cntry").isNull(), "n/a")
                    .otherwise(col("cntry"))
                )

## Rename the Table

In [0]:
RENAME_MAP = {
    "CID": "customer_number",
    "CNTRY": "country"
}
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

df = df.withColumn("_ingest_time", F.current_timestamp())
     

In [0]:
df.display()

## Write the erp cust loc table

In [0]:
df.write.mode("overwrite").saveAsTable("dev_project.silver.erp_loc_a101")

In [0]:
%sql
SELECT * FROM dev_project.silver.erp_loc_a101 LIMIT 10