In [0]:
# ----------------------------------------
# Step 1: Load Gold Layer Data (Real-Time)
# ----------------------------------------

gold_table_path = "/mnt/realtimedeai/gold/streaming_sales_enriched/"

# Read the real-time enriched sales data
sales_df = spark.read.format("delta").load(gold_table_path)

display(sales_df)


In [0]:
# ----------------------------------------
# Step 2: Total Sales by Store (Bar Chart)
# ----------------------------------------

from pyspark.sql.functions import sum,round

sales_by_store_df = (
    sales_df
    .groupBy("store_id")
    .agg(round(sum("sale_amount"),2).alias("total_sales"))
    .orderBy("total_sales", ascending=False)
)

display(sales_by_store_df)


Databricks visualization. Run in Databricks to view.

In [0]:
# ----------------------------------------
# Step 3: Total Sales by Category (Bar Chart)
# ----------------------------------------

sales_by_category_df = (
    sales_df
    .groupBy("category")
    .agg(round(sum("sale_amount"),2).alias("total_sales"))
    .orderBy("total_sales", ascending=False)
)

display(sales_by_category_df)


Databricks visualization. Run in Databricks to view.

In [0]:
# ----------------------------------------
# Step 4: Top 10 Best-Selling Products
# ----------------------------------------

top_products_df = (
    sales_df
    .groupBy("product_name")
    .agg(round(sum("sale_amount"),2).alias("total_sales"))
    .orderBy("total_sales", ascending=False)
    .limit(10)
)

display(top_products_df)


Databricks visualization. Run in Databricks to view.

In [0]:
# Step 4: Revenue Trend by Day (Line Chart)
from pyspark.sql.functions import to_date

sales_by_day_df = (
    sales_df
    .withColumn("sale_date", to_date("transaction_time"))
    .groupBy("sale_date")
    .agg(round(sum("sale_amount"), 2).alias("daily_sales"))
    .orderBy("sale_date")
)

display(sales_by_day_df)


Databricks visualization. Run in Databricks to view.

In [0]:
# Load GPT summary from table
gpt_df = spark.read.table("gold.gpt_sales_summary").orderBy("timestamp", ascending=False).limit(1)
display(gpt_df)


Databricks visualization. Run in Databricks to view.