In [None]:
# 🔧 Cell 1
import mlflow, mlflow.sklearn
from pyspark.sql import SparkSession
from scripts.etl_gold import GoldETL
from scripts.anomaly_gold import GoldAnomalyDetector

spark = SparkSession.builder.appName("GoldPipeline").getOrCreate()

# 📦 Cell 2: ETL
GoldETL().run()

# 🧠 Cell 3: Anomaly Detection
with mlflow.start_run(run_name="Gold Anomaly"):
    det = GoldAnomalyDetector(spark, method="isolation_forest")
    anomalies = det.run()
    mlflow.log_param("method", det.method)
    mlflow.log_metric("num_anomalies", len(anomalies))
    mlflow.sklearn.log_model(det.model, "model")
    mlflow.log_artifact("scripts/anomaly_gold.py")

# 📊 Cell 4: Display
display(spark.table("main.etl.gold_prices_monthly_agg"))
display(anomalies)

# 🔍 Cell 5: MLflow Tracking
print(f"Run ID: {mlflow.active_run().info.run_id}")
