# import Necessory spark functions

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, DateType
from pyspark.sql.window import Window
from pyspark.sql.functions import col, trim, current_timestamp

# Read table

In [0]:
df = spark.read.table("dev_project.bronze.crm_cust_info")

# Data Transformation 

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

# Normalization

In [0]:
df = df.withColumn("cst_marital_status", F.when(F.upper(col("cst_marital_status")) == "M", "Married")\
                                        .when(F.upper(col("cst_marital_status")) == "S", "Single")\
                                          .otherwise("n/a") ) \
      .withColumn("cst_gndr", F.when(F.upper(col("cst_gndr")) == "M", "Male")\
                                        .when(F.upper(col("cst_gndr")) == "F", "Female")\
                                          .otherwise("n/a") ) 

# Remove Missing values/ Null values

In [0]:
df = df.filter(col("cst_id").isNotNull())

# Rename the Columns

In [0]:
RENAME_MAP = {
    "cst_id": "customer_id",
    "cst_key": "customer_number",
    "cst_firstname": "first_name",
    "cst_lastname": "last_name",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "created_date"}

for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)
# adding the current time as a column    
df = df.withColumn("ingest_time", current_timestamp())


# Writing silver table

In [0]:
df = df.write.mode("overwrite").saveAsTable("dev_project.silver.crm_cust_info")