In [0]:
"""# 08 MLflow Experiment Tracking

This notebook tracks ML experiments using MLflow.

Tracked items:
- Model parameters
- Evaluation metrics
- Model artifacts

Use cases:
- Model comparison
- Reproducibility
- Production readiness
"""

In [0]:
import mlflow
import mlflow.spark

from pyspark.sql import functions as F
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.evaluation import RegressionEvaluator, BinaryClassificationEvaluator


In [0]:
mlflow.set_experiment("/Shared/pharmacy_analytics_ml")


In [0]:
ml_df = spark.table("ml_pharmacy_features")

feature_cols = [
    "stock_count",
    "avg_price",
    "avg_shelf_life",
    "near_expiry_items"
]

assembler = VectorAssembler(
    inputCols=feature_cols,
    outputCol="features"
)


In [0]:
with mlflow.start_run(run_name="linear_regression_stock_risk"):

    reg_df = assembler.transform(ml_df) \
        .select("features", "stock_risk_score") \
        .withColumnRenamed("stock_risk_score", "label")

    train, test = reg_df.randomSplit([0.8, 0.2], seed=42)

    lr = LinearRegression(featuresCol="features", labelCol="label")
    model = lr.fit(train)

    preds = model.transform(test)

    rmse = RegressionEvaluator(
        labelCol="label",
        predictionCol="prediction",
        metricName="rmse"
    ).evaluate(preds)

    r2 = RegressionEvaluator(
        labelCol="label",
        predictionCol="prediction",
        metricName="r2"
    ).evaluate(preds)

    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)

    # IMPORTANT: Do NOT log model in CE
    mlflow.log_param(
        "artifact_note",
        "Model trained in Databricks Community Edition; artifact not registered"
    )

    print("Regression experiment logged successfully")


In [0]:
"""## View Experiment Results

Navigate to:
**Experiments → pharmacy_analytics_ml**

Compare:
- RMSE & R² for regression
- AUC for classification
- Model artifacts & parameters
"""

In [0]:
"""## MLflow Contract

✔ Experiments tracked  
✔ Metrics logged  
✔ Models versioned  
✔ Runs comparable  
✔ Production-ready workflow  

MLflow enables governance and reproducibility.
"""