# ☁️ NimbusOps – Monitoring Notebook

This notebook shows how to:

1. Connect to the MLflow tracking backend used by NimbusOps  
2. List experiments and runs  
3. Inspect metrics for recent training runs  
4. Visualize accuracy and AUC over time  

By default, it assumes the MLflow tracking URI is the local `mlruns` directory
in the project root (NimbusOps/).


In [None]:
from pathlib import Path
import os

import mlflow
import mlflow.tracking
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path(__file__).resolve().parents[1]
DEFAULT_TRACKING_URI = PROJECT_ROOT / "mlruns"

tracking_uri = os.environ.get("MLFLOW_TRACKING_URI", str(DEFAULT_TRACKING_URI))
mlflow.set_tracking_uri(tracking_uri)

print("Project root:", PROJECT_ROOT)
print("Using MLflow tracking URI:", tracking_uri)

## 1. List MLflow experiments

NimbusOps training (`src/train.py`) logs to an experiment named
`nimbusops-breast-cancer` by default.


In [None]:
client = mlflow.tracking.MlflowClient()

experiments = client.list_experiments()
exp_df = pd.DataFrame(
    [
        {
            "name": e.name,
            "experiment_id": e.experiment_id,
            "artifact_location": e.artifact_location,
            "lifecycle_stage": e.lifecycle_stage,
        }
        for e in experiments
    ]
)

exp_df

## 2. Load runs for NimbusOps experiment

We focus on the `nimbusops-breast-cancer` experiment and pull recent runs.


In [None]:
experiment_name = "nimbusops-breast-cancer"
experiment = client.get_experiment_by_name(experiment_name)

if experiment is None:
    raise RuntimeError(f"Experiment '{experiment_name}' not found yet. "
                       "Run src/train.py at least once.")

print("Using experiment:", experiment.name, "id:", experiment.experiment_id)

runs = client.search_runs(
    experiment_ids=[experiment.experiment_id],
    filter_string="",
    max_results=100,
    order_by=["attributes.start_time DESC"],
)

rows = []
for r in runs:
    row = {
        "run_id": r.info.run_id,
        "start_time": pd.to_datetime(r.info.start_time, unit="ms"),
    }
    row.update({f"param_{k}": v for k, v in r.data.params.items()})
    row.update({f"metric_{k}": v for k, v in r.data.metrics.items()})
    rows.append(row)

runs_df = pd.DataFrame(rows)
runs_df.sort_values("start_time", inplace=True)
runs_df.reset_index(drop=True, inplace=True)
runs_df

## 3. Visualize metrics over time

We plot accuracy and AUC for recent runs to see how the model is evolving.


In [None]:
plt.style.use("default")

metric_cols = [c for c in runs_df.columns if c.startswith("metric_")]
if not metric_cols:
    print("No metrics logged yet. Run src/train.py to create some runs.")
else:
    fig, axes = plt.subplots(len(metric_cols), 1, figsize=(8, 4 * len(metric_cols)))
    if len(metric_cols) == 1:
        axes = [axes]

    for ax, col in zip(axes, metric_cols):
        ax.plot(runs_df["start_time"], runs_df[col], marker="o")
        ax.set_title(col.replace("metric_", "").upper())
        ax.set_xlabel("run start time")
        ax.set_ylabel(col.replace("metric_", ""))
        ax.grid(True, linestyle="--", alpha=0.5)

    plt.tight_layout()
    plt.show()

## 4. Inspect a single run

Pick the last run and inspect its parameters and metrics.


In [None]:
if not runs_df.empty:
    last_run_id = runs_df.iloc[-1]["run_id"]
    last_run = client.get_run(last_run_id)

    print("Run ID:", last_run_id)
    print("\nParams:")
    display(pd.DataFrame(last_run.data.params.items(), columns=["param", "value"]))

    print("\nMetrics:")
    display(pd.DataFrame(last_run.data.metrics.items(), columns=["metric", "value"]))
else:
    print("No runs found yet.")