# Experiment Analysis — Titanic MLOps Pipeline

This notebook explores MLflow experiment results:
- Compare model performance across runs
- Visualise Optuna hyperparameter search
- Identify the best model for promotion to Production

In [None]:
import sys
from pathlib import Path

# Ensure project root is importable
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

import pandas as pd
import matplotlib.pyplot as plt
import mlflow

from src.utils.config_loader import load_config

cfg = load_config(PROJECT_ROOT / "config" / "config.yaml")
mlflow.set_tracking_uri(cfg["mlflow"]["tracking_uri"])
print(f"MLflow tracking: {cfg['mlflow']['tracking_uri']}")

## 1. Load All Experiment Runs

In [None]:
experiment = mlflow.get_experiment_by_name(cfg["mlflow"]["experiment_name"])

if experiment is None:
    print("No experiment found — run `python train.py` first!")
else:
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["metrics.accuracy DESC"],
    )
    print(f"Total runs: {len(runs)}")
    runs.head(10)

## 2. Compare Baseline Models

In [None]:
# Filter baseline runs (non-HPO)
if experiment is not None:
    baseline = runs[runs["tags.stage"] == "baseline"].copy()
    
    metrics_cols = ["metrics.accuracy", "metrics.precision", "metrics.recall",
                    "metrics.f1", "metrics.roc_auc"]
    comparison = baseline[["params.model_type"] + metrics_cols].set_index("params.model_type")
    comparison.columns = [c.replace("metrics.", "") for c in comparison.columns]
    
    display(comparison.style.highlight_max(axis=0, color="lightgreen"))

In [None]:
# Bar chart comparison
if experiment is not None and len(baseline) > 0:
    fig, ax = plt.subplots(figsize=(10, 5))
    comparison.plot(kind="bar", ax=ax, rot=0)
    ax.set_title("Baseline Model Comparison")
    ax.set_ylabel("Score")
    ax.legend(loc="lower right")
    plt.tight_layout()
    plt.show()

## 3. Optuna HPO Analysis

In [None]:
# Filter HPO trials
if experiment is not None:
    hpo_runs = runs[runs["tags.stage"] == "hpo"]
    trial_runs = runs[runs["tags.optuna_trial"].notna()].copy()
    
    if len(trial_runs) > 0:
        trial_runs["trial"] = trial_runs["tags.optuna_trial"].astype(int)
        trial_runs = trial_runs.sort_values("trial")
        
        fig, ax = plt.subplots(figsize=(12, 5))
        ax.plot(trial_runs["trial"], trial_runs["metrics.cv_accuracy"], "b-o", markersize=3)
        ax.axhline(trial_runs["metrics.cv_accuracy"].max(), color="r", linestyle="--",
                   label=f"Best: {trial_runs['metrics.cv_accuracy'].max():.4f}")
        ax.set_xlabel("Trial")
        ax.set_ylabel("CV Accuracy")
        ax.set_title("Optuna Optimisation History")
        ax.legend()
        plt.tight_layout()
        plt.show()
    else:
        print("No HPO trials found — run `python optimize.py` first!")

## 4. Best Model Summary

In [None]:
if experiment is not None and len(runs) > 0:
    best_run = runs.iloc[0]  # Already sorted by accuracy DESC
    print(f"Best run ID:  {best_run['run_id']}")
    print(f"Model type:   {best_run.get('params.model_type', 'N/A')}")
    print(f"Accuracy:     {best_run.get('metrics.accuracy', 'N/A'):.4f}")
    print(f"F1 Score:     {best_run.get('metrics.f1', 'N/A'):.4f}")
    print(f"ROC AUC:      {best_run.get('metrics.roc_auc', 'N/A'):.4f}")

---

### Next Steps

1. **Promote to Production** — Use `mlflow_utils.transition_model_stage()` to move the best model to the Production stage
2. **Serve** — Run `python serve.py` to start the prediction API
3. **Monitor** — Check `/health` and `/model/info` endpoints for model status