## **Initialization**

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, DateType
from pyspark.sql.functions import col, trim, length

## **Reading From Bronze Layer**

In [0]:
df = spark.table("workspace.bronze.crm_sales_details")

## **Data Transformation**

### 1-Trimming String Type Data

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

### 2-Date Cleaning

In [0]:
# 1-Creation of a function to clean the date columns

def clean_date(col_name):
    return (
        F.when(
            (col(col_name) == 0) | (length(col(col_name)) != 8),
            None
        )
        .otherwise(
            F.to_date(col(col_name).cast("string"), "yyyyMMdd")
        )
    )

# 2-Creation of list of date columns to be cleaned
date_columns = [
    "sls_order_dt",
    "sls_ship_dt",
    "sls_due_dt"
]

# 3-Application of the "clean_date" function to the date columns
for c in date_columns:
    df = df.withColumn(c, clean_date(c))


### 3-Sales and Price Corrections

In [0]:
df = (
    df
    .withColumn("sls_price",
                F.expr("""
                       
                       CASE 
                            WHEN sls_price IS NULL OR sls_price <= 0 THEN
                                CASE 
                                    WHEN sls_quantity != 0 THEN sls_sales / sls_quantity
                                    ELSE 'None'
                                END
                            ELSE sls_price
                        END

                       """)
    )
)


### 4-Renaming Columns name

In [0]:
# Creating new column names in a dictionary
RENAME_MAP = {
    "sls_ord_num": "order_number",
    "sls_prd_key": "product_number",
    "sls_cust_id": "customer_id",
    "sls_order_dt": "order_date",
    "sls_ship_dt": "ship_date",
    "sls_due_dt": "due_date",
    "sls_sales": "sales_amount",
    "sls_quantity": "quantity",
    "sls_price": "price"
}

# Looping for all columns names and rename them
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)

## **Write Into Silver Layer**

In [0]:
df.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.crm_sales")

## **Sanity Check Of Data Frame**

In [0]:
df.display()