In [0]:

# Read Silver Layer Data (Batch + Stream)


# Read Streaming Sales Transactions 
# silver_streaming_sales_path = "/mnt/realtimedeai/silver/streaming/sales_transactions/"
# streaming_sales_df = spark.read.format("delta").load(silver_streaming_sales_path)
# Read Streaming Sales from DLT table
streaming_sales_df = spark.read.table("streaming_data.streaming_sales")

# Read Batch Products Table 
silver_products_path = "/mnt/realtimedeai/silver/batch/products/"
products_df = spark.read.format("delta").load(silver_products_path)

# Read Batch Stores Table 
silver_stores_path = "/mnt/realtimedeai/silver/batch/stores/"
stores_df = spark.read.format("delta").load(silver_stores_path)

# Read Promotions Table 
silver_promotions_path = "/mnt/realtimedeai/silver/batch/promotions/"
promotions_df = spark.read.format("delta").load(silver_promotions_path)

print("Silver tables read successfully!")


In [0]:

# Enrich Streaming Sales with Product Info


# 1. Join Streaming Sales with Products
sales_with_product_df = (
    streaming_sales_df
    .join(products_df.withColumnRenamed("processed_time", "processed_time_products"), 
          on="product_id", 
          how="left")
)

print("Streaming Sales enriched with Product Info!")


In [0]:

#Enrich Sales (Already With Product) with Store Info


# Join Sales (already enriched with Product) with Stores
sales_enriched_df = (
    sales_with_product_df
    .join(stores_df.withColumnRenamed("processed_time", "processed_time_stores"), 
          on="store_id", 
          how="left")
)

print("Streaming Sales further enriched with Store Info!")


In [0]:

# Add Business Columns for Gold Layer


from pyspark.sql.functions import year, month, dayofmonth, col, round

# Add new business columns
sales_gold_df = (
    sales_enriched_df
    .withColumn("sale_datetime", col("transaction_time"))  # For reporting
    .withColumn("sale_year", year(col("transaction_time")))
    .withColumn("sale_month", month(col("transaction_time")))
    .withColumn("sale_day", dayofmonth(col("transaction_time")))
    .withColumn("total_revenue", round(col("quantity_sold") * col("base_price"),2))
    .drop("processed_time_products","processed_time_stores")
 
    
)



print("Business columns added successfully!")




In [0]:

#Write Final Gold Table (Streaming Enriched Sales)


# Define Gold Layer Path
gold_output_path = "/mnt/realtimedeai/gold/streaming_sales_enriched/"

# Write data to Gold (Delta format)
(
    sales_gold_df
    .write
    .format("delta")
    .mode("overwrite")
    .save(gold_output_path)
)

print("Gold table written successfully to:", gold_output_path)
