In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType
from pyspark.sql.functions import trim, col, length

## Reading From Bronze Layer

In [0]:
sales_details = spark.table("workspace.bronze.crm_sales_details")

## Transformation

In [0]:

# Trimming the string column's value

for field in sales_details.schema.fields:
  if isinstance(field.dataType, StringType):
      sales_details = sales_details.withColumn(field.name, trim(col(field.name)))

In [0]:
# Renaming the columns

column_map = {
  "sls_ord_num": "order_number",
  "sls_prd_key": "product_key",
  "sls_cust_id": "customer_id",
  "sls_order_dt": "order_date",
  "sls_ship_dt": "ship_date",
  "sls_due_dt": "due_date",
  "sls_sales": "sales_amount",
  "sls_quantity": "quantity",
  "sls_price": "price"
}

In [0]:
for old_column_name, new_column_name in column_map.items():
    sales_details = sales_details.withColumnRenamed(old_column_name, new_column_name)


In [0]:
# Date standardization steps (YYYMMDD to ISO)

cols_to_fix = ['order_date', 'ship_date', 'due_date']

for col_name in cols_to_fix:
    if col_name in sales_details.columns:
        sales_details = sales_details.withColumn(
            col_name,
            F.when(
                (col(col_name) == 0) | (length(col(col_name)) != 8),
                None
            ).otherwise(F.to_date(col(col_name).cast("string"), "yyyyMMdd"))
        )


In [0]:
# Price patching and null handling logic

sales_details = (sales_details.withColumn("price",
        F.when(
            (col("price").isNull()) | (col("price") <= 0),
            F.when(
                col("quantity") != 0,
                col("sales_amount") / col("quantity")
            ).otherwise(None)
        ).otherwise(col("price"))
    )
)


## Writing to Silver Table

In [0]:
sales_details.write.mode("overwrite").format("delta").saveAsTable("workspace.silver.crm_sales_details")