# Set Up Notebook Environment
This notebook shows how to trigger the end-to-end AutoML pipeline with a tighter time budget and document the outputs.

In [None]:
from pathlib import Path
import os
import subprocess

import pandas as pd

REPO_ROOT = Path.cwd()
os.environ.setdefault("FLAML_TIME_BUDGET", "30")  # Keep AutoML search capped at ~30 seconds per dataset

# Demonstrate Code with Inline Comments
The cell below kicks off the orchestrator script and captures its return details for quick inspection.

In [None]:
pipeline_cmd = ["python", "scripts/run_all.py"]  # Run the consolidated training + analysis pipeline
runtime_env = os.environ.copy()
runtime_env["PYTHONPATH"] = str(REPO_ROOT)  # Ensure relative imports resolve

result = subprocess.run(
    pipeline_cmd,
    cwd=REPO_ROOT,
    env=runtime_env,
    capture_output=True,
    text=True,
    check=False,
)

print(f"Return code: {result.returncode}")
print(result.stdout[-1000:])

# Pair Code Cells with Markdown Explanations
The previous cell invokes `scripts/run_all.py` using the repository root as the working directory. The environment clone inherits the 30-second FLAML cap, so each dataset should finish faster. We capture stdout and the return code to verify whether the orchestrator succeeded without scrolling through the entire log.

# Run and Verify Output
This final step inspects the leaderboard artifacts produced by the pipeline so we can confirm that fresh metrics were persisted.

In [None]:
candidate_paths = [
    REPO_ROOT / "reports" / "leaderboard_multi.csv",
    REPO_ROOT / "reports" / "leaderboard.csv",
]
for run_board in sorted((REPO_ROOT / "runs").glob("*/reports/leaderboard.csv")):
    candidate_paths.append(run_board)

for path in candidate_paths:
    if path.exists():
        chosen_leaderboard = path
        break
else:  # pragma: no cover - defensive guard for missing artifacts
    raise FileNotFoundError("No leaderboard CSV was generated. Make sure the orchestrator completed.")

print(f"Inspecting leaderboard at: {chosen_leaderboard.relative_to(REPO_ROOT)}")
df = pd.read_csv(chosen_leaderboard)
display(df.head())

# Run Additional Python Entry Points
If you need to call other scripts in this repository, reuse the helper below. It keeps the working directory anchored at the repository root and propagates the same environment (including `PYTHONPATH` and the 30-second FLAML budget).

In [None]:
def run_python(relative_path: str, *extra_args: str) -> subprocess.CompletedProcess:
    """Execute a repository Python script with consistent environment settings."""
    command = ["python", relative_path, *extra_args]
    print(f"Running: {' '.join(command)}")
    completed = subprocess.run(
        command,
        cwd=REPO_ROOT,
        env=runtime_env,
        capture_output=True,
        text=True,
        check=False,
    )
    print(f" â†’ exit code {completed.returncode}")
    if completed.stdout:
        print(completed.stdout[-500:])
    if completed.stderr:
        print("stderr:\n" + completed.stderr[-500:])
    return completed

# Example: uncomment to run individual components with the same settings
# run_python("scripts/run_automl_suite.py")
# run_python("Project/analysis/explain_shap.py")
# run_python("Project/trainers/train_boosters.py")

# Summarize Leaderboard Metrics
To quickly compare frameworks, the next cell aggregates the latest leaderboard by computing mean and standard deviation for the key metrics.

In [None]:
metrics_to_track = [col for col in ["f1_macro", "accuracy", "roc_auc_ovr", "avg_precision_ovr"] if col in df.columns]
if not metrics_to_track:
    raise ValueError("No common metric columns found in the leaderboard.")

summary = (
    df.groupby("framework")[metrics_to_track]
    .agg(["mean", "std"])
    .round(4)
)
summary.columns = ["_".join(filter(None, level)).strip("_") for level in summary.columns]
display(summary)

if "dataset" in df.columns:
    dataset_rollup = (
        df.groupby(["dataset", "framework"])[metrics_to_track]
        .agg(["mean", "std"])
        .round(4)
    )
    dataset_rollup.columns = ["_".join(filter(None, level)).strip("_") for level in dataset_rollup.columns]
    display(dataset_rollup)
else:
    print("Dataset column not present; per-framework summary shown above.")