# ðŸ“Š 03 â€” Evaluation

**Purpose:** Generate predictions and evaluate metrics for trained models.

**Sections:**
1. Inline Setup
2. Copy Data to /content (if needed for prediction)
3. Select Checkpoint
4. Generate Predictions
5. Evaluate Metrics
6. Confusion Matrix Visualization

**Prerequisites:** Trained checkpoint exists on Drive (from 02_training.ipynb)


## ðŸ”§ Section 1: Inline Setup


In [None]:
# --- INLINE SETUP ---
import os, subprocess, sys

REPO_URL       = "https://github.com/ClaudiaCPach/CNNs-distracted-driving"
REPO_DIRNAME   = "CNNs-distracted-driving"
BRANCH         = "main"
PROJECT_ROOT   = f"/content/{REPO_DIRNAME}"
DRIVE_PATH     = "/content/drive/MyDrive/TFM"
DRIVE_DATA_ROOT = f"{DRIVE_PATH}/data"
FAST_DATA      = "/content/data"
DATASET_ROOT   = DRIVE_DATA_ROOT
OUT_ROOT       = f"{DRIVE_PATH}/outputs"
CKPT_ROOT      = f"{DRIVE_PATH}/checkpoints"

from google.colab import drive
drive.mount('/content/drive', force_remount=False)

def sh(cmd):
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed: {cmd}")

if os.path.isdir(PROJECT_ROOT):
    sh(f"cd {PROJECT_ROOT} && git pull --rebase origin {BRANCH}")
else:
    sh(f"git clone --branch {BRANCH} {REPO_URL} {PROJECT_ROOT}")

sh(f"pip install -q -e {PROJECT_ROOT}")
!pip -q install timm

os.environ["DRIVE_PATH"] = DRIVE_PATH
os.environ["DATASET_ROOT"] = DATASET_ROOT
os.environ["OUT_ROOT"] = OUT_ROOT
os.environ["CKPT_ROOT"] = CKPT_ROOT
os.environ["FAST_DATA"] = FAST_DATA

sys.path.insert(0, PROJECT_ROOT)
sys.path.insert(0, os.path.join(PROJECT_ROOT, "src"))
print("âœ… Inline setup complete")


## âš¡ Section 2: Fast Data Loading from Tar Archive (Optional)

Run this if you need faster I/O for prediction. Skip if reading from Drive is acceptable.

**âš¡ Speed Boost:** Uses tar archives for fast loading (~5 min instead of ~2 hours).


In [None]:
# âš¡ Extract HYBRID CROPS from tar archive (run if needed)
# Uses tar archive for fast loading (~5 min vs 2+ hours)

import os
import shutil
from pathlib import Path
import importlib
from ddriver.data.fastcopy import fast_copy_from_tar

HYBRID_VARIANT = "face"  # face | face_hands â€” match what you trained on

LOCAL_ROOT = Path("/content/data/hybrid")
DRIVE_ROOT = Path(OUT_ROOT) / "hybrid"
LOCAL_VARIANT_DIR = LOCAL_ROOT / HYBRID_VARIANT

# Tar archive path (created in 01_data_preparation.ipynb)
TAR_PATH = DRIVE_ROOT / f"hybrid_{HYBRID_VARIANT}.tar"

# Check if already extracted
if LOCAL_VARIANT_DIR.exists() and any(LOCAL_VARIANT_DIR.rglob("*.jpg")):
    jpg_count = sum(1 for _ in LOCAL_VARIANT_DIR.rglob("*.jpg"))
    print(f"âœ… Already in /content ({jpg_count} jpgs). Skipping.")
elif not TAR_PATH.exists():
    raise FileNotFoundError(
        f"Tar archive not found: {TAR_PATH}\n"
        "Run 01_data_preparation.ipynb Section 4 to create tar archives first."
    )
else:
    print(f"ðŸ“¦ Extracting {HYBRID_VARIANT} crops from tar archive...")
    result = fast_copy_from_tar(
        tar_path_on_drive=TAR_PATH,
        dest_dir=LOCAL_VARIANT_DIR,
        remove_tar_after=True,
        verbose=True,
    )
    print(f"   âœ… Extracted {result.get('n_files', '?')} files")
    
    # Copy CSVs (these are small, direct copy is fine)
    for fname in [f"manifest_{HYBRID_VARIANT}.csv", f"train_{HYBRID_VARIANT}.csv",
                  f"val_{HYBRID_VARIANT}.csv", f"test_{HYBRID_VARIANT}.csv"]:
        src = DRIVE_ROOT / fname
        if src.exists():
            shutil.copy2(src, LOCAL_ROOT / fname)
            print(f"   Copied {fname}")

os.environ["HYBRID_ROOT_LOCAL"] = str(LOCAL_ROOT)
os.environ["DATASET_ROOT"] = str(LOCAL_ROOT)
from ddriver import config as _cfg
importlib.reload(_cfg)
print(f"âœ… DATASET_ROOT = {os.environ['DATASET_ROOT']}")


## ðŸ“¦ Section 3: Select Checkpoint


In [None]:
# Select checkpoint to evaluate
from pathlib import Path

RUN_TAG = "effb0_face_v1"  # <<<< CHANGE to match your training run

run_base = Path(CKPT_ROOT) / "runs" / RUN_TAG
runs = sorted(run_base.glob("*/"))
if not runs:
    raise FileNotFoundError(f"No run folders found under {run_base}")

print("Available runs:")
for idx, run_dir in enumerate(runs):
    print(f"  [{idx}] {run_dir.name}")

RUN_IDX = -1  # -1 = newest
target_run = runs[RUN_IDX]
print(f"\nSelected run: {target_run}")

# Choose checkpoint
CHECKPOINT_NAME = "best.pt"  # or "last.pt"
LATEST_CKPT = target_run / CHECKPOINT_NAME
if not LATEST_CKPT.exists():
    raise FileNotFoundError(LATEST_CKPT)

print(f"Using checkpoint: {LATEST_CKPT}")


## ðŸ”® Section 4: Generate Predictions


In [None]:
# Generate predictions
import os
import subprocess
import textwrap
from pathlib import Path
import importlib

# Configuration
PRED_SPLIT = "test"           # or "val"
USE_HYBRID = True             # Match what model was trained on
ROI_VARIANT = "face"          # face | face_hands
MODEL_NAME = "efficientnet_b0"
BATCH_SIZE = 32
NUM_WORKERS = 2
IMAGE_SIZE = 224

# Control split selection (for 5-run experimental plan)
# Set to None for natural runs, or "facesubset" / "fhsubset" / "both" for control runs
# NOTE: Control splits only work with full-frame (USE_HYBRID=False)
USE_CONTROL_SPLIT = None      # None | "facesubset" | "fhsubset" | "both"

# Validate settings
if USE_CONTROL_SPLIT and USE_HYBRID:
    raise ValueError("Control splits require full-frame evaluation. Set USE_HYBRID=False to use control splits.")

PRED_TAG = f"{RUN_TAG}_{PRED_SPLIT}"

# Build paths
if USE_HYBRID:
    hybrid_root = Path(os.environ.get("HYBRID_ROOT_LOCAL", Path(OUT_ROOT) / "hybrid"))
    manifest_pred = hybrid_root / f"manifest_{ROI_VARIANT}.csv"
    train_pred = hybrid_root / f"train_{ROI_VARIANT}.csv"
    val_pred = hybrid_root / f"val_{ROI_VARIANT}.csv"
    test_pred = hybrid_root / f"test_{ROI_VARIANT}.csv"
    
    # Ensure DATASET_ROOT points to hybrid
    os.environ["DATASET_ROOT"] = str(hybrid_root)
    from ddriver import config as _cfg
    importlib.reload(_cfg)
    print(f"ðŸ“¦ Using HYBRID crops: {ROI_VARIANT}")
else:
    manifest_pred = Path(OUT_ROOT) / "manifests" / "manifest.csv"
    # Handle control splits for 5-run experimental plan
    if USE_CONTROL_SPLIT:
        control_root = Path(OUT_ROOT) / "splits" / "control"
        train_pred = control_root / f"train_{USE_CONTROL_SPLIT}.csv"
        val_pred = control_root / f"val_{USE_CONTROL_SPLIT}.csv"
        test_pred = control_root / f"test_{USE_CONTROL_SPLIT}.csv"
        print(f"ðŸ“¦ Using FULL images with CONTROL SPLIT: {USE_CONTROL_SPLIT}")
    else:
        train_pred = Path(OUT_ROOT) / "splits" / "train.csv"
        val_pred = Path(OUT_ROOT) / "splits" / "val.csv"
        test_pred = Path(OUT_ROOT) / "splits" / "test.csv"
        print("ðŸ“¦ Using FULL images")

predict_cmd = textwrap.dedent(f"""
cd {PROJECT_ROOT}
python -m src.ddriver.cli.predict \
    --model-name {MODEL_NAME} \
    --checkpoint {LATEST_CKPT} \
    --split {PRED_SPLIT} \
    --batch-size {BATCH_SIZE} \
    --num-workers {NUM_WORKERS} \
    --image-size {IMAGE_SIZE} \
    --out-tag {PRED_TAG} \
    --manifest-csv {manifest_pred} \
    --train-csv {train_pred} --val-csv {val_pred} --test-csv {test_pred}
""")

print("Running prediction:\n", predict_cmd)
result = subprocess.run(predict_cmd, shell=True, text=True, capture_output=True)
if result.returncode != 0:
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    raise RuntimeError("Prediction failed")
print(result.stdout)
print("\nâœ… Predictions saved to OUT_ROOT/preds/")


## ðŸ“Š Section 5: Evaluate Metrics


In [None]:
# Evaluate metrics
import subprocess
import textwrap
from pathlib import Path

if USE_HYBRID:
    hybrid_root = Path(os.environ.get("HYBRID_ROOT_LOCAL", Path(OUT_ROOT) / "hybrid"))
    manifest_path = hybrid_root / f"manifest_{ROI_VARIANT}.csv"
    split_csv_path = hybrid_root / f"{PRED_SPLIT}_{ROI_VARIANT}.csv"
else:
    manifest_path = Path(OUT_ROOT) / "manifests" / "manifest.csv"
    # Handle control splits for 5-run experimental plan
    if USE_CONTROL_SPLIT:
        control_root = Path(OUT_ROOT) / "splits" / "control"
        split_csv_path = control_root / f"{PRED_SPLIT}_{USE_CONTROL_SPLIT}.csv"
    else:
        split_csv_path = Path(OUT_ROOT) / "splits" / f"{PRED_SPLIT}.csv"

preds_csv_path = Path(OUT_ROOT) / "preds" / PRED_SPLIT / f"{PRED_TAG}.csv"
METRICS_TAG = PRED_TAG

print(f"ðŸ“Š Evaluating: {preds_csv_path}")

metrics_cmd = textwrap.dedent(f"""
cd {PROJECT_ROOT}
python -m src.ddriver.eval.metrics \
    --manifest {manifest_path} \
    --split-csv {split_csv_path} \
    --predictions {preds_csv_path} \
    --out-tag {METRICS_TAG} \
    --per-driver \
    --per-camera
""")

print("Running metrics:\n", metrics_cmd)
result = subprocess.run(metrics_cmd, shell=True, text=True, capture_output=True)
if result.returncode != 0:
    print("STDOUT:", result.stdout)
    print("STDERR:", result.stderr)
    raise RuntimeError("Metrics failed")
print(result.stdout)
print("\nâœ… Metrics saved to OUT_ROOT/metrics/")


## ðŸŽ¯ Section 6: Confusion Matrix Visualization


In [None]:
# Visualize confusion matrix
import json
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

metrics_root = Path(OUT_ROOT) / "metrics" / METRICS_TAG
runs = sorted(metrics_root.glob("*/"))
if not runs:
    raise FileNotFoundError(f"No metrics runs found under {metrics_root}")
latest_metrics = runs[-1]
print("Reading from:", latest_metrics)

metrics = json.loads((latest_metrics / "metrics.json").read_text())
cm_info = metrics.get("confusion_matrix")
if not cm_info:
    raise ValueError("confusion_matrix missing from metrics.json")

labels = cm_info["rows_cols_labels"]
cm_df = pd.DataFrame(cm_info["matrix"], index=labels, columns=labels)

# Counts heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt="d", cmap="Blues")
plt.title(f"Confusion Matrix â€” {METRICS_TAG}")
plt.ylabel("True class")
plt.xlabel("Predicted class")
plt.tight_layout()
plt.savefig(latest_metrics / "confusion_matrix_counts.png")
plt.show()

# Normalized heatmap
cm_norm = cm_df.div(cm_df.sum(axis=1).replace(0, 1), axis=0)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_norm, annot=True, fmt=".2f", cmap="Blues")
plt.title(f"Normalized Confusion Matrix â€” {METRICS_TAG}")
plt.ylabel("True class")
plt.xlabel("Predicted class")
plt.tight_layout()
plt.savefig(latest_metrics / "confusion_matrix_normalized.png")
plt.show()

print(f"âœ… Saved confusion matrices to {latest_metrics}")


## âœ… Evaluation Complete!

**Outputs saved to Drive:**
- `OUT_ROOT/preds/{split}/{PRED_TAG}.csv` â€” Predictions CSV
- `OUT_ROOT/metrics/{METRICS_TAG}/{timestamp}/metrics.json` â€” All metrics
- `OUT_ROOT/metrics/{METRICS_TAG}/{timestamp}/confusion_matrix_*.png` â€” Heatmaps

**Next steps:**
- Run **04_modality_analysis.ipynb** for per-class comparison across modalities
- Run **05_gradcam.ipynb** for attention visualizations
