In [0]:
dbutils.widgets.text("env","dev")
env = dbutils.widgets.get("env")

In [0]:
%run "/Workspace/Users/azuredataengineer44@gmail.com/databricks-traffic/Databricks Retail Notebooks/common/config_loader"

In [0]:
config = load_config(env)

catalog = config["unity_catalog"]["catalog"]
bronze_table = f"{catalog}.bronze.orders_raw"
df_bronze = spark.read.table(bronze_table)


In [0]:
from pyspark.sql.functions import col

df_orders = (
    df_bronze
    .select(
        "order_id",
        "order_date",
        "order_timestamp",
        "customer_id",
        "product_id",
        "quantity",
        "unit_price",
        "order_status",
        "payment_mode",
        "ingestion_ts",
        "source_file_path"
    )

)

In [0]:
from pyspark.sql.functions import expr

df_orders = df_orders.withColumn(
    "order_amount",
    expr("quantity * unit_price")
)

In [0]:
from pyspark.sql.functions import row_number
from pyspark.sql.window import Window


window_spec = Window.partitionBy("order_id").orderBy(col("ingestion_ts").desc())

df_deduped = (
    df_orders
    .withColumn("rn", row_number().over(window_spec))
    .filter(col("rn") == 1)
    .drop("rn")
)


In [0]:
silver_table = f"{catalog}.silver.orders_fact"

spark.sql(f"""
          CREATE TABLE IF NOT EXISTS {silver_table} (
              order_id STRING,
  order_date DATE,
  order_timestamp TIMESTAMP,
  customer_id STRING,
  product_id STRING,
  quantity INT,
  unit_price DOUBLE,
  order_amount DOUBLE,
  order_status STRING,
  payment_mode STRING,
  ingestion_ts TIMESTAMP,
  source_file STRING
)
USING DELTA          
          """)

In [0]:
from delta.tables import DeltaTable

silver_dt = DeltaTable.forName(spark, silver_table)

In [0]:
silver_orders_df = (
    silver_dt.alias("t")
    .merge(
        df_deduped.alias("s"),
        "t.order_id = s.order_id"
    )
    .whenNotMatchedInsert(values={
        "order_id" : "s.order_id",
"order_date" : "s.order_date",
"order_timestamp" : "s.order_timestamp",
"customer_id": "s.customer_id",
"product_id" : "s.product_id",
"quantity" : "s.quantity",
"unit_price" : "s.unit_price",
"order_amount" : "s.order_amount",
"order_status" : "s.order_status",
"payment_mode" : "s.payment_mode",
"ingestion_ts" : "s.ingestion_ts",
"source_file" : "s.source_file_path"  
    })
    )

silver_orders_df.execute()

In [0]:
spark.sql(""" SELECT * FROM dev_catalog.bronze.orders_raw""").display()

In [0]:
spark.sql(""" SELECT * FROM dev_catalog.silver.orders_fact""").display()