#Import necessary modules

In [0]:
from pyspark.sql.functions import col
from delta.tables import DeltaTable

# Read Source

In [0]:
df_src = spark.sql('''
SELECT
*
FROM parquet.`abfss://silver@globalsalestorage.dfs.core.windows.net/clean_e_commerce_data`
''')

## Create Fact Table

In [0]:
df_order = spark.sql('select * from ecom_catalog.gold.dim_order')
df_customer = spark.sql('select * from ecom_catalog.gold.dim_customer')
df_product = spark.sql('select * from ecom_catalog.gold.dim_product')

In [0]:
df_fact = df_src.join(df_customer, df_src["Customer_ID"] == df_customer["Customer_ID"], 'left')\
                .join(df_product, df_src["Product_ID"] == df_product["Product_ID"] ,'left')\
                .join(df_order, df_src["Order_ID"] == df_order["Order_ID"] ,'left')\
                .select(
                    df_order.dim_order_key,
                    df_customer.dim_customer_key,
                    df_product.dim_product_key,
                    df_src.price,
                    df_src.Sales,
                    df_src.Quantity,
                    df_src.Discount,
                    df_src.Profit,
                    df_src.Shipping_Cost
                )

# Write Fact Table

In [0]:
if spark.catalog.tableExists("ecom_catalog.gold.fact_sales"):
    delta_tbl = DeltaTable.forName(spark, "ecom_catalog.gold.fact_sales")
    delta_tbl.alias("t").merge(
        df_fact.alias("s"),
        "t.dim_order_key = s.dim_order_key AND t.dim_customer_key = s.dim_customer_key AND t.dim_product_key = s.dim_product_key"
    )\
    .whenMatchedUpdateAll()\
    .whenNotMatchedInsertAll()\
    .execute()
else:
    df_fact.write \
        .format("delta") \
        .mode("overwrite") \
        .option("path", "abfss://gold@globalsalestorage.dfs.core.windows.net/fact_sales") \
        .saveAsTable("ecom_catalog.gold.fact_sales")