In [0]:
"""# 04 Gold Business Aggregates

This notebook creates Gold-layer aggregates from Silver pharmacy data.

Focus areas:
- Category-level performance
- Product-level sales metrics
- Expiry risk insights
- ML & dashboard-ready KPIs
"""

In [0]:
from pyspark.sql import functions as F


In [0]:
silver_df = spark.table("silver_pharmacy_events")

print("Silver count:", silver_df.count())
silver_df.show(5)


In [0]:
gold_category_metrics = (
    silver_df
    .groupBy("category")
    .agg(
        F.count("*").alias("total_records"),
        F.avg("price_including_gst").alias("avg_price"),
        F.sum("near_expiry_flag").alias("near_expiry_count")
    )
)


In [0]:
gold_product_metrics = (
    silver_df
    .groupBy("medicine_brand_name", "manufacturing_company", "category")
    .agg(
        F.countDistinct("batch_number").alias("batch_count"),
        F.avg("price_including_gst").alias("avg_price"),
        F.avg("shelf_life_months").alias("avg_shelf_life_months")
    )
)


In [0]:
gold_expiry_risk = (
    silver_df
    .groupBy("category")
    .agg(
        F.sum("near_expiry_flag").alias("near_expiry_items"),
        F.count("*").alias("total_items")
    )
    .withColumn(
        "expiry_risk_pct",
        (F.col("near_expiry_items") / F.col("total_items")) * 100
    )
)


In [0]:
gold_category_metrics.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_category_metrics")

gold_product_metrics.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_product_metrics")

gold_expiry_risk.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("gold_expiry_risk")


In [0]:
%sql
SELECT * FROM gold_category_metrics ORDER BY total_records DESC;
SELECT * FROM gold_expiry_risk ORDER BY expiry_risk_pct DESC;


In [0]:
"""## Gold Layer Contract

✔ Aggregated business metrics  
✔ Dashboard-ready tables  
✔ Expiry risk quantified  
✔ Category & product insights  
✔ ML feature-ready  

These tables power analytics, forecasting, and decision-making.
"""