In [None]:
# 🔧 Cell 1
import mlflow, mlflow.sklearn
from pyspark.sql import SparkSession
from scripts.etl_nasdaq import NasdaqETL
from scripts.anomaly_nasdaq import NasdaqAnomalyDetector

spark = SparkSession.builder.appName("NasdaqPipeline").getOrCreate()

# 📦 Cell 2: ETL
NasdaqETL().run()

# 🧠 Cell 3: Anomaly Detection
with mlflow.start_run(run_name="Nasdaq Anomaly"):
    det = NasdaqAnomalyDetector(spark, method="lof")
    anomalies = det.run()
    mlflow.log_param("method", det.method)
    mlflow.log_metric("num_anomalies", len(anomalies))
    mlflow.sklearn.log_model(det.model, "model")
    mlflow.log_artifact("scripts/anomaly_nasdaq.py")

# 📊 Cell 4: Display
display(spark.table("main.etl.nasdaq_listings_enriched"))
display(anomalies)

# 🔍 Cell 5: MLflow Tracking
print(f"Run ID: {mlflow.active_run().info.run_id}")
