In [0]:
# MAGIC %run ./00_config
# STEP 6: Real-Time Monitoring & Alerts (Streaming Simulation + Dashboard Link)

from pyspark.sql import functions as F
from pyspark.sql.types import *

# ---- Safe fallback if 00_config not loaded ----
try:
    tbl  # type: ignore
except NameError:
    CATALOG = "influencer"
    def tbl(name: str) -> str:
        return f"{CATALOG}.{name}"

# ---- Load base data ----
pred_tbl = tbl("ml.creator_predictions")
features_tbl = tbl("ml.creator_features")

if not (spark.catalog.tableExists(pred_tbl) and spark.catalog.tableExists(features_tbl)):
    raise RuntimeError("❌ Required tables missing. Run Steps 3–5 first!")

preds = spark.table(pred_tbl)
features = spark.table(features_tbl)

# ---- Simulate streaming updates (mock real-time data ingestion) ----
# In production, this would come via Kafka or API sources.
stream_df = (
    features.join(preds, on="creator_norm_id", how="inner")
    .withColumn("timestamp", F.current_timestamp())
    .withColumn("recent_eng_rate", F.col("avg_eng_rate") * (1 + F.rand() * 0.3 - 0.15))  # simulate ±15% drift
)

# ---- Real-time KPI aggregation ----
agg_df = (
    stream_df.groupBy("platform")
    .agg(
        F.avg("recent_eng_rate").alias("current_eng_rate"),
        F.avg("success_prob").alias("avg_success_prob"),
        F.count("*").alias("num_creators")
    )
    .orderBy(F.desc("current_eng_rate"))
)

print("✅ Real-Time KPI Summary by Platform")
display(agg_df)

# ---- Anomaly Detection Logic – Alerts ----
alerts = stream_df.withColumn(
    "performance_flag",
    F.when(F.col("recent_eng_rate") < F.col("avg_eng_rate") * 0.75, "⚠️ Drop Detected")
     .when(F.col("recent_eng_rate") > F.col("avg_eng_rate") * 1.25, "🚀 Spike Detected")
     .otherwise("✅ Stable")
)

alert_summary = alerts.groupBy("platform", "performance_flag").count().orderBy("platform")

print("🚨 Real-Time Alerts Summary")
display(alert_summary)

# ---- Write summary tables to ML schema for dashboard consumption ----
agg_tbl = tbl("ml.platform_kpi_summary")
alert_tbl = tbl("ml.platform_alert_summary")

(agg_df.write.format("delta").mode("overwrite").option("overwriteSchema","true")
 .saveAsTable(agg_tbl))

(alert_summary.write.format("delta").mode("overwrite").option("overwriteSchema","true")
 .saveAsTable(alert_tbl))

print(f"✅ KPI Summary saved to: {agg_tbl}")
print(f"✅ Alerts saved to: {alert_tbl}")

# ---- Databricks Dashboard Link ----
DASHBOARD_URL = "https://dbc-6dce4afe-7b9d.cloud.databricks.com/editor/notebooks/2054052152661904/dashboards/640cad98-1d7a-47a5-888f-a47cfa858ff3?o=1957348823009"

print("\n📊 View Live Dashboard Here:")
print(DASHBOARD_URL)

displayHTML(f"""
<div style="padding:10px;border-radius:10px;background-color:#f0f8ff;">
    <h3>📈 Access the Real-Time Monitoring Dashboard</h3>
    <p>Click below to open your live Databricks Dashboard:</p>
    <a href="{DASHBOARD_URL}" target="_blank" style="font-size:16px;color:#1f77b4;font-weight:bold;">
        🔗 Open Influencer Analytics Dashboard
    </a>
</div>
""")


✅ Real-Time KPI Summary by Platform


platform,current_eng_rate,avg_success_prob,num_creators
youtube,0.0,0.0474258731775667,1
twitter,0.0,0.0474258731775667,1
tiktok,0.0,0.0474258731775667,1
instagram,0.0,0.0474258731775667,1


🚨 Real-Time Alerts Summary


platform,performance_flag,count
instagram,✅ Stable,1
tiktok,✅ Stable,1
twitter,✅ Stable,1
youtube,✅ Stable,1


✅ KPI Summary saved to: influencer.ml.platform_kpi_summary
✅ Alerts saved to: influencer.ml.platform_alert_summary

📊 View Live Dashboard Here:
https://dbc-6dce4afe-7b9d.cloud.databricks.com/editor/notebooks/2054052152661904/dashboards/640cad98-1d7a-47a5-888f-a47cfa858ff3?o=1957348823009
