In [0]:
# If utilities.py is a notebook in your workspace, use:
%run "./utilities.py"

transformer = transformations()



In [0]:
orders_df = spark.read.table("`e-commerce-project`.silver.orders")
payments_df = spark.read.table("`e-commerce-project`.silver.payment")
shipments_df = spark.read.table("`e-commerce-project`.silver.shipments")

In [0]:
%sql
select * from `e-commerce-project`.silver.orders

order_id,customer_id,payment_method,order_status,event_time,shipping_city,shipping_pincode,ip_address,order_date,product_id,quantity,price,item_value_in_order,order_total_value,is_high_value,fraud_score,event_date
ORD001,C001,COD,PLACED,2025-10-08T09:10:00Z,Mumbai,400001,203.0.113.11,2025-10-08,P001,1,25000.0,25000.0,25950.0,True,30,2025-10-08
ORD001,C001,COD,PLACED,2025-10-08T09:10:00Z,Mumbai,400001,203.0.113.11,2025-10-08,P008,1,950.0,950.0,25950.0,True,30,2025-10-08
ORD002,C002,CARD,PLACED,2025-10-09T10:20:00Z,Delhi,110001,203.0.113.12,2025-10-09,P004,1,2500.0,2500.0,4250.0,False,5,2025-10-09
ORD002,C002,CARD,PLACED,2025-10-09T10:20:00Z,Delhi,110001,203.0.113.12,2025-10-09,P011,1,1750.0,1750.0,4250.0,False,5,2025-10-09
ORD003,C003,UPI,PLACED,2025-10-10T11:30:00Z,Bengaluru,560001,203.0.113.13,2025-10-10,P002,1,55000.0,55000.0,55000.0,True,5,2025-10-10
ORD004,C001,CARD,PLACED,2025-10-11T12:00:00Z,Pune,411001,203.0.113.11,2025-10-11,P003,2,1500.0,3000.0,6300.0,False,5,2025-10-11
ORD004,C001,CARD,PLACED,2025-10-11T12:00:00Z,Pune,411001,203.0.113.11,2025-10-11,P009,1,3300.0,3300.0,6300.0,False,5,2025-10-11
ORD005,C004,COD,PLACED,2025-10-12T08:45:00Z,Chennai,600001,203.0.113.14,2025-10-12,P007,2,800.0,1600.0,5600.0,False,5,2025-10-12
ORD005,C004,COD,PLACED,2025-10-12T08:45:00Z,Chennai,600001,203.0.113.14,2025-10-12,P010,1,4000.0,4000.0,5600.0,False,5,2025-10-12
ORD006,C005,WALLET,PLACED,2025-10-13T09:15:00Z,Hyderabad,500001,203.0.113.15,2025-10-13,P006,1,5000.0,5000.0,7300.0,False,5,2025-10-13


In [0]:
# Databricks notebook: gold_fact_orders

from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from delta.tables import DeltaTable

# --------------------------------------------
# Step 1: Join orders, payments, and shipments
# --------------------------------------------
fact_orders = (
    orders_df.alias("o")
    .join(payments_df.alias("p"), F.col("o.order_id") == F.col("p.order_id"), "left")
    .join(shipments_df.alias("s"), F.col("o.order_id") == F.col("s.order_id"), "left")
    .select(
        F.col("o.order_id"),
        F.col("o.customer_id"),
        F.col("o.product_id"),
        F.col("o.shipping_city"),
        F.col("o.shipping_pincode"),
        F.col("o.ip_address"),
        F.col("o.order_total_value").alias("order_amount"),
        F.col("p.method").alias("payment_method"),
        F.col("p.payment_status"),
        F.col("s.courier"),
        F.col("s.status").alias("shipment_status"),
        F.col("s.delivery_delay_hours"),
        F.col("o.order_date"),
        F.col("o.event_date"),
        F.col("o.is_high_value"),
        F.col("o.fraud_score"),
        F.current_timestamp().alias("last_updated")
    )
)

# --------------------------------------------
# Step 3: Write or Merge into fact_orders table
# --------------------------------------------
# First time run → create table
# fact_orders.write.format("delta").mode("overwrite").saveAsTable("`e-commerce-project`.gold.fact_orders")

display(
    fact_orders.count()
)


34

In [0]:
transformer.create_or_upsert(
    spark,
    fact_orders,
    ["order_id","customer_id","product_id"],
    "gold.fact_orders",
    "last_updated"
)

✅ Created new Delta table: `e-commerce-project`.gold.fact_orders


In [0]:
spark.read.table("`e-commerce-project`.gold.dim_products").show(20) 
spark.read.table("`e-commerce-project`.gold.dim_customers").show(20)
spark.read.table("`e-commerce-project`.gold.dim_courier_partners").show(20)

+----------+--------------------+----------------+---------+-------+--------------------+--------------------+--------+
|product_id|        product_name|product_category|    brand|  price|        last_updated|          __START_AT|__END_AT|
+----------+--------------------+----------------+---------+-------+--------------------+--------------------+--------+
|      P001|      Smartphone-s21|     Electronics|  SAMSUNG|25000.0|2025-10-18 04:02:...|2025-10-18 04:02:...|    NULL|
|      P002|              Laptop|     Electronics|     DELL|55000.0|2025-10-18 04:02:...|2025-10-18 04:02:...|    NULL|
|      P003|               Jeans|         Fashion|    LEVIS| 1500.0|2025-10-18 04:02:...|2025-10-18 04:02:...|    NULL|
|      P004|               Shoes|         Fashion|     NIKE| 2500.0|2025-10-18 04:02:...|2025-10-18 04:02:...|    NULL|
|      P005|             Blender|            Home|  PHILIPS| 1800.0|2025-10-18 04:02:...|2025-10-18 04:02:...|    NULL|
|      P006|           Microwave|       