# Module 5: Logging Experiments with MLflow

**Course**: End-to-End Machine Learning (Datacamp)  
**Case Study**: CardioCare Heart Disease Prediction  
**Author**: Seif

---

## Overview

In this module, you'll learn to:
- Create and set an MLflow experiment
- Start runs and log parameters, metrics, and models
- Retrieve runs programmatically (get_run, search_runs)
- Compare runs in the MLflow UI

Why MLflow?
- Keeps experiments organized and reproducible
- Essential in clinical settings where results must be auditable

---

In [None]:
# Imports
import os
import time
import numpy as np
import pandas as pd
import mlflow
import mlflow.sklearn

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

## 1) Create an experiment

Set or create an experiment with `mlflow.set_experiment` so all runs are grouped together.

In [None]:
experiment_name = "cardiocare-heart-disease"
mlflow.set_experiment(experiment_name)
print(f"Active experiment: {experiment_name}")

## 2) Run an experiment and log params/metrics

We'll train a simple Logistic Regression on a synthetic binary dataset (stand-in for CardioCare data), and log parameters and metrics to MLflow.

In [None]:
# Create a toy binary classification dataset
X, y = make_classification(
    n_samples=1000, n_features=10, n_informative=6, n_redundant=2,
    random_state=42, class_sep=1.2
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and log with MLflow
with mlflow.start_run() as run:
    model = LogisticRegression(max_iter=1000, C=1.0, n_jobs=1)
    model.fit(X_train, y_train)

    # Predictions and metrics
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Log parameters and metrics
    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("C", model.C)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1", f1)

    # Optionally log the model artifact
    mlflow.sklearn.log_model(model, artifact_path="model")

    run_id = run.info.run_id
    print("Run ID:", run_id)
    print({"accuracy": acc, "f1": f1})

## 3) Retrieve experiments programmatically

- `mlflow.get_run(run_id)` returns metadata for a specific run
- `mlflow.search_runs(experiment_names=[...])` returns a pandas DataFrame for all runs in experiments

In [None]:
# Example: fetch the last run by searching and taking the first row
runs_df = mlflow.search_runs(experiment_names=[experiment_name], order_by=["start_time DESC"], max_results=5)
print("Recent runs:\n", runs_df[["run_id", "metrics.accuracy", "metrics.f1", "params.C"]])

if not runs_df.empty:
    some_run_id = runs_df.iloc[0]["run_id"]
    run_info = mlflow.get_run(some_run_id)
    print("\nFetched run:", some_run_id)
    print("Params:", run_info.data.params)
    print("Metrics:", run_info.data.metrics)

## 4) Compare runs in the MLflow UI

Start the UI locally to filter, sort, and compare runs:

```powershell
mlflow ui
```

The UI reads from the local `mlruns/` folder by default. You can also point to a remote tracking server by setting `MLFLOW_TRACKING_URI`.

In [None]:
# Logistic Regression: log coefficients and intercept to MLflow
# This cell trains a small Logistic Regression model and logs its learned parameters.
import mlflow
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

# Small synthetic dataset for demonstration
X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=6,
    n_redundant=2,
    random_state=42,
    class_sep=1.25
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = LogisticRegression(max_iter=1000, solver="liblinear", random_state=42)

# Initialize the MLflow experiment (as requested)
mlflow.set_experiment("Logistic Regression Heart Disease Prediction")

# Start a run, fit the model, and log coefficients/intercept
with mlflow.start_run():
    model.fit(X_train, y_train)

    # Log learned parameters (coefficients and intercept)
    # Note: MLflow params are stored as strings, but floats are accepted and cast.
    for idx, coef in enumerate(model.coef_[0]):
        mlflow.log_param(f"coef_{idx}", float(coef))
    mlflow.log_param("intercept", float(model.intercept_[0]))

    # Log a couple of simple metrics for context
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1", f1)

    run_id = mlflow.active_run().info.run_id
    print(f"MLflow run_id: {run_id}")
    print({"accuracy": acc, "f1": f1})

In [None]:
# Evaluate model using k-fold cross-validation and print confusion matrix
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

# Resolve dataset and model variables, with safe fallbacks
try:
    X_data = heart_disease_df_X
    y_data = heart_disease_df_y
except NameError:
    # Fallback to synthetic data if heart_disease_df_* not defined
    try:
        X_data = X
        y_data = y
    except NameError:
        from sklearn.datasets import make_classification
        X_data, y_data = make_classification(
            n_samples=1000,
            n_features=10,
            n_informative=6,
            n_redundant=2,
            random_state=42,
            class_sep=1.25
        )

# Reuse existing model if present, else define a default Logistic Regression
try:
    model
except NameError:
    model = LogisticRegression(max_iter=1000, solver="liblinear", random_state=42)

# Evaluate model using k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
score = cross_val_score(model, X_data, y_data, scoring='balanced_accuracy', cv=kf)
print("Cross-validation balanced_accuracy scores:", score)
print("Mean ", score.mean(), "Std ", score.std())

# Fit on full data to produce predictions for confusion matrix (demo purpose)
model.fit(X_data, y_data)
y_pred = model.predict(X_data)

# Print confusion matrix
cm = confusion_matrix(y_data, y_pred)
print("Confusion matrix (on full data):\n", cm)