# Import libraries and modules

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.functions import trim, col
from pyspark.sql.types import StringType


# Rename map


In [0]:
RENAME_MAP = {
    'sls_ord_num': 'order_number',
    'sls_prd_key': 'product_key',
    'sls_cust_id': 'customer_id',
    'sls_order_dt': 'order_date',
    'sls_ship_dt': 'ship_date',
    'sls_due_dt': 'due_date',
    'sls_sales': 'sales',
    'sls_quantity': 'quantity',
    'sls_price': 'price'
    }

# Reading data from the Bronze and check schema

In [0]:
df = spark.table("workspace.bronze.crm_sales_details")
df.printSchema()


#Renaming columns


In [0]:
def rename_columns(df, RENAME_MAP:dict):
    for old_name, new_name in RENAME_MAP.items():
        df = df.withColumnRenamed(old_name, new_name)
    return df
df = rename_columns(df, RENAME_MAP)
df.printSchema()

# Trim strings columns 

In [0]:
def trim_string_columns(df):
    for field in df.schema.fields:
        if isinstance(field.dataType, StringType):
            df = df.withColumn(field.name, trim(col(field.name)))
    return df
df = trim_string_columns(df)
df.display()
#df.printSchema()

In [0]:
df2 = (
    df
    .withColumn("order_date_dt",
        F.to_date(F.col("order_date").cast("string"), "yyyyMMdd")
    )
    .withColumn("ship_date_dt",
        F.to_date(F.col("ship_date").cast("string"), "yyyyMMdd")
    )
    .withColumn("due_date_dt",
        F.to_date(F.col("due_date").cast("string"), "yyyyMMdd")
    )
)
df2.display()
#df2.printSchema()

In [0]:
df2.select(
    "order_date",
    F.col("order_date_dt").alias("order_date_new"),
    "ship_date",
    F.col("ship_date_dt").alias("ship_date_new"),
    "due_date",
    F.col("due_date_dt").alias("due_date_new")
).show(20, False)


In [0]:
df.filter(F.col("order_date") == 32154).show(50, False)


In [0]:
df.filter(
    F.length(F.col("order_date").cast("string")) != 8
)



# Write to Silver Table

In [0]:
(
    df.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable("silver.crm_sales_details")
)

In [0]:
%sql
SELECT *
FROM workspace.silver.crm_sales_details
where
order_date == 32154 and
customer_id == 16864
AND order_number == SO69215;

In [0]:
%sql


SELECT *
FROM workspace.silver.crm_sales_details
WHERE order_date IS NOT NULL
AND LENGTH(CAST(order_date AS STRING)) <> 8;



In [0]:
SELECT
  SUM(CASE WHEN order_date IS NOT NULL AND LENGTH(CAST(order_date AS STRING)) = 10 THEN 1 ELSE 0 END) AS valid_len_8,
  SUM(CASE WHEN order_date IS NOT NULL AND LENGTH(CAST(order_date AS STRING)) <>10 THEN 1 ELSE 0 END) AS invalid_len
FROM workspace.silver.crm_sales_details;

