# Read the Source Table

In [0]:
products = spark.table('workspace.silver_pyspark.crm_products')
display(products.limit(2))

In [0]:
px_cat = spark.table('workspace.silver_pyspark.erp_px_cat')
display(px_cat.limit(2))

# Join the tables

In [0]:
from pyspark.sql.functions import col, when, coalesce, lit
from pyspark.sql.functions import row_number
from pyspark.sql.window import Window
window_spec = Window.orderBy(col('p.start_date'), col('p.product_id'))
dim_prod = (products.alias('p')
.join(px_cat.alias('px'), col('p.category_id') == col('px.category_id'), 'left')
.withColumn('product_key', row_number().over(window_spec))
.select(
    col('product_key'),
    col('p.product_id'),
    col('p.category_id'),
    col('p.product_number'),
    col('p.product_name'),
    col('px.category'),
    col('px.subcategory'),
    col('px.maintenance'),
    col('p.cost'),
    col('p.product_line'),
    col('p.start_date'),
    col('p.end_date')))

In [0]:
dim_prod.display()

# Drop the Target table if it already exists

In [0]:
spark.sql("""DROP TABLE IF EXISTS workspace.gold_pyspark.dim_products""")

# Create the Target Table & Load the joined tables o/p data into it

In [0]:
dim_prod.write.format('delta').mode('overwrite').saveAsTable('workspace.gold_pyspark.dim_products')

# Sanity checks of Target table

In [0]:
dim_prod = spark.table('workspace.gold_pyspark.dim_products')
dim_prod.display()

# View Target Table Changes

In [0]:
spark.sql("""DESCRIBE HISTORY workspace.gold_pyspark.dim_products""").display()