##Bronze to Silver Location A101

#####Initialization

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import *
from pyspark.sql import DataFrame
from pyspark.sql.types import StringType
from pyspark.sql.window import Window

#####Create DataFrame

In [0]:
df = spark.table('dlh.bronze_db.bronze_loc_a101')

#####Check Null in Primary Column

In [0]:
df_check = df.filter(col("CNTRY").isNull()).display()

#####Remove "-" and Replace ""

In [0]:
df1 = df.withColumn("CID", F.regexp_replace(col("CID"), "-", ""))
df1.display()

In [0]:
df1.select(col("CNTRY")).distinct().display()

#####Trim Function

In [0]:
def trimmer(df: DataFrame) -> DataFrame:
  for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
      df = df.withColumn(field.name,F.trim(F.col(field.name)))
  return df

In [0]:
df2 = trimmer(df1)

#####Data Normalization

In [0]:
df3 = ( df2
            .withColumn("CNTRY", 
                        F.when(col("CNTRY").isin("US", "USA","United States"), "United States")
                        .when(col("CNTRY").isin("DE", "Germany"), "Germany")
                        .when((col("CNTRY") == "") | 
                              col("CNTRY").isNull(), "n/a")
                        .otherwise(col("CNTRY")))
       
)

In [0]:
df3.select(col("CNTRY")).distinct().display()


#####Rename Function

In [0]:
RENAME_MAP = {
   "cid": "customer_number",
    "cntry": "country"
}

def renamed(df: DataFrame) -> DataFrame:
  for old_name, new_name in RENAME_MAP.items():
    df= df.withColumnRenamed(old_name, new_name)
  return df

In [0]:
df4 = renamed(df3)

#####Final Validation

In [0]:
# df_check = df5.filter(col("CID").isNull()).display()

In [0]:
df5 = df4.drop("file_path","ingest_ts").withColumn("ingest_ts", F.current_timestamp())

#####Write to Delta Table in Silver Layer

In [0]:

spark.sql("DROP TABLE IF EXISTS dlh.silver_db.silver_loc_a101")
(
    df5.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("dlh.silver_db.silver_loc_a101")
)