# Initialization

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col

# Read bronze table

In [0]:
df = spark.table("workspace.bronze.crm_sales_details")

# Transformations

**_Renaming fields_**

In [0]:
rename_fields = {
    "sls_ord_num": "order_id",
    "sls_prd_key": "product_key",
    "sls_cust_id": "customer_id",
    "sls_order_dt": "order_date",
    "sls_ship_dt": "ship_date",
    "sls_due_dt": "due_date",
    "sls_sales": "sales",
    "sls_quantity": "quantity",
    "sls_price": "price"
}
for old_name, new_name in rename_fields.items():
    df = df.withColumnRenamed(old_name, new_name)

**_Trimming string fields_**

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

**_Casting Datetype_**

In [0]:
df =(
    df
    .withColumn("order_date", 
                F.when((F.length(col("order_date")) != 8) | (col("order_date") == "0"), None)
                .otherwise(F.try_to_date(col("order_date").cast("string"), "yyyyMMdd"))
    )
    .withColumn("ship_date", 
                F.when((F.length(col("ship_date")) != 8) | (col("ship_date") == "0"), None)
                .otherwise(F.try_to_date(col("ship_date").cast("string"), "yyyyMMdd"))
    )
    .withColumn("due_date", 
                F.when((F.length(col("due_date")) != 8) | (col("due_date") == "0"), None)
                .otherwise(F.try_to_date(col("due_date").cast("string"), "yyyyMMdd"))
    )
)

**_Sales price corrections_**

In [0]:


df = (
    df
    .withColumn(
        "price",
        F.when(
            (col("price").isNull()) | (col("price") <= 0),
            F.when(
                col("quantity") != 0,
                col("sales") / col("quantity")
            ).otherwise(None)
        ).otherwise(col("price"))
    )
)


# Sanity checks for data frame

In [0]:
df.limit(10).display()

# write to silver table

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.crm_sales")

# sanity checks for silver table

In [0]:
%sql
SELECT * FROM workspace.silver.crm_sales LIMIT(10);