# Read the source tables

In [0]:
dim_cust = spark.table('workspace.gold_pyspark.dim_customers')
display(dim_cust.limit(2))

In [0]:
dim_prod = spark.table('workspace.gold_pyspark.dim_products')
display(dim_prod.limit(52))

In [0]:
crm_sales = spark.table('workspace.silver_pyspark.crm_sales')
display(crm_sales.limit(50))

# Join the Source Tables

## Main table as crm_sales

In [0]:
from pyspark.sql.functions import col, when, coalesce, lit
fact_sales = (crm_sales.alias('s')
 .join(
     dim_cust.alias('c'), col('s.customer_number') == col('c.customer_id'), 'left')
.join(dim_prod.alias('p'),
       col('s.product_number') == col('p.product_number'), 'left'
)
.select(
    col('s.order_number'),
    col('c.customer_key'),
    col('p.product_key'),
    col('s.order_date'),
    col('s.ship_date'),
    col('s.due_date'),
    col('s.sales_amount'),
    col('s.quantity'),
    col('s.price')
    ))

#     dim_prod = (products.alias('p')
# .join(px_cat.alias('px'), col('p.category_id') == col('px.category_id'), 'left')
# .withColumn('product_key', row_number().over(window_spec))

In [0]:
fact_sales.display()

# Drop the Target table if already exists

In [0]:
spark.sql("""DROP TABLE IF EXISTS workspace.gold_pyspark.fact_sales""")

# Create Target table & load the Joined query output into it

In [0]:
fact_sales.write.format('delta').mode('overwrite').saveAsTable('workspace.gold_pyspark.fact_sales')

# Sanity Checks

In [0]:
fact_sales = spark.table('workspace.gold_pyspark.fact_sales')
display(fact_sales.limit(2))

# View the table changes

In [0]:
spark.sql("""DESCRIBE HISTORY workspace.gold_pyspark.fact_sales""").display()