In [None]:
# 🔧 Cell 1
import mlflow, mlflow.sklearn
from pyspark.sql import SparkSession
from scripts.etl_gas import GasETL
from scripts.anomaly_gas import GasAnomalyDetector

spark = SparkSession.builder.appName("GasPipeline").getOrCreate()

# 📦 Cell 2: ETL
GasETL().run()

# 🧠 Cell 3: Anomaly Detection
with mlflow.start_run(run_name="Gas Anomaly"):
    det = GasAnomalyDetector(spark, method="ocsvm")
    anomalies = det.run()
    mlflow.log_param("method", det.method)
    mlflow.log_metric("num_anomalies", len(anomalies))
    mlflow.sklearn.log_model(det.model, "model")
    mlflow.log_artifact("scripts/anomaly_gas.py")

# 📊 Cell 4: Display
display(spark.table("main.etl.natural_gas_prices_monthly_agg"))
display(anomalies)

# 🔍 Cell 5: MLflow Tracking
print(f"Run ID: {mlflow.active_run().info.run_id}")
