In [None]:
# Databricks notebook

# 🔧 Cell 1: Setup
import mlflow, mlflow.sklearn
from pyspark.sql import SparkSession
from scripts.etl_retail import RetailETL
from scripts.anomaly_retail import RetailAnomalyDetector

spark = SparkSession.builder.appName("RetailPipeline").getOrCreate()

# 📦 Cell 2: ETL
etl = RetailETL()
etl.run()

# 🧠 Cell 3: Anomaly Detection
with mlflow.start_run(run_name="Retail Anomaly"):
    detector = RetailAnomalyDetector(spark, method="isolation_forest")
    anomalies = detector.run()
    mlflow.log_param("method", detector.method)
    mlflow.log_metric("num_anomalies", len(anomalies))
    mlflow.sklearn.log_model(detector.model, "model")
    mlflow.log_artifact("scripts/anomaly_retail.py")

# 📊 Cell 4: Display
display(spark.table("main.etl.retail_monthly_sales"))
display(anomalies)

# 🔍 Cell 5: MLflow Tracking
print(f"Run: {mlflow.active_run().info.run_id}")
