
# üöÄ Colab Setup ‚Äî **CNNs-distracted-driving** (hardcoded + config-aware)

This version is **simplified and hardcoded** for your repo and URL, and it **respects your `src/ddriver/config.py`**.
- Repo name fixed to **`CNNs-distracted-driving`**
- Repo URL fixed to **`https://github.com/ClaudiaCPach/CNNs-distracted-driving`**
- Uses your `config.py` convention: when running in Colab, we **set env vars** (`DRIVE_PATH`, `DATASET_ROOT`, `OUT_ROOT`, `CKPT_ROOT`, `FAST_DATA`) so your code reads correct paths via `ddriver.config`.
- Optional `FAST_DATA` at `/content/data` for faster I/O (if you later copy data there).

> Run cells **top ‚Üí bottom** the first time. Re-run **Update repo** to pull new commits after you push.


In [None]:

# üîß 0) (Optional) quick GPU check
!nvidia-smi || echo "No GPU detected ‚Äî CPU runtime is okay for setup steps."


In [None]:

# üîß 1) Fixed config for your repo + Drive layout
REPO_URL       = "https://github.com/ClaudiaCPach/CNNs-distracted-driving"
REPO_DIRNAME   = "CNNs-distracted-driving"   # hardcoded
BRANCH         = "main"
PROJECT_ROOT   = f"/content/{REPO_DIRNAME}"  # where the repo will live in Colab

# Your persistent Google Drive base folder (matches your project docs):
DRIVE_PATH     = "/content/drive/MyDrive/TFM"

# Your dataset lives under TFM/data/auc.distracted.driver.dataset_v2 (as per your structure)
DATASET_ROOT   = f"{DRIVE_PATH}/data"   # not .../data/auc.distracted.driver.dataset_v2OUT_ROOT       = f"{DRIVE_PATH}/outputs"
CKPT_ROOT      = f"{DRIVE_PATH}/checkpoints"

# Optional: a fast, ephemeral workspace inside the VM
FAST_DATA      = "/content/data"   # leave as-is; you can rsync into this later for speed


In [None]:

# üîå 2) Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=False)
print("‚úÖ Drive mounted.")


In [None]:

# üìÅ 3) Clone or update the repo (no name inference ‚Äî all hardcoded)
import os, subprocess

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

if os.path.isdir(PROJECT_ROOT):
    print(f"üìÅ Repo already present at {PROJECT_ROOT}. Pulling latest on branch {BRANCH}...")
    sh(f"cd {PROJECT_ROOT} && git fetch origin {BRANCH} && git checkout {BRANCH} && git pull --rebase origin {BRANCH}")
else:
    print(f"‚¨áÔ∏è Cloning {REPO_URL} ‚Üí {PROJECT_ROOT}")
    sh(f"git clone --branch {BRANCH} {REPO_URL} {PROJECT_ROOT}")

print("PROJECT_ROOT =", PROJECT_ROOT)


In [None]:

# üì¶ 4) Install the repo (editable) + requirements (uses pyproject.toml if present)
import os, subprocess

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

print("üîÑ Upgrading pip/setuptools/wheel...")
sh("python -m pip install --upgrade pip setuptools wheel")

has_pyproject = os.path.exists(os.path.join(PROJECT_ROOT, "pyproject.toml"))
if has_pyproject:
    print("üì¶ Editable install from pyproject.toml ...")
    sh(f"cd {PROJECT_ROOT} && pip install -e .")
else:
    print("‚ö†Ô∏è No pyproject.toml found. Skipping editable install.")

req_path = os.path.join(PROJECT_ROOT, "requirements.txt")
if os.path.exists(req_path):
    print("üìù Installing requirements.txt...")
    sh(f"pip install -r {req_path}")
else:
    print("‚ÑπÔ∏è No requirements.txt found ‚Äî continuing.")


In [None]:

# üå≥ 5) Configure environment for your ddriver.config (Colab branch)
# Your config.py reads env vars and falls back to sensible defaults when in Colab.
import os

os.environ["DRIVE_PATH"]   = DRIVE_PATH
os.environ["DATASET_ROOT"] = DATASET_ROOT
os.environ["OUT_ROOT"]     = OUT_ROOT
os.environ["CKPT_ROOT"]    = CKPT_ROOT
os.environ["FAST_DATA"]    = FAST_DATA

# Also write a .env (harmless in Colab; helpful if code calls load_dotenv())
env_text = f"""DRIVE_PATH={DRIVE_PATH}
DATASET_ROOT={DATASET_ROOT}
OUT_ROOT={OUT_ROOT}
CKPT_ROOT={CKPT_ROOT}
FAST_DATA={FAST_DATA}
"""
with open(os.path.join(PROJECT_ROOT, ".env"), "w") as f:
    f.write(env_text)

print("‚úÖ Environment variables set for ddriver.config")
print("\nSummary:")
for k in ["DRIVE_PATH","DATASET_ROOT","OUT_ROOT","CKPT_ROOT","FAST_DATA"]:
    print(f"{k} = {os.environ[k]}")


In [None]:

# üîó 6) (Optional) Symlink dataset into repo for familiar paths (scripts that assume PROJECT_ROOT/data/...)
# Not required when using ddriver.config, but convenient for ad-hoc browsing.
import os

LOCAL_DATA_DIR = f"{PROJECT_ROOT}/data"
os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

dataset_link = os.path.join(LOCAL_DATA_DIR, "auc.distracted.driver.dataset_v2")
if not os.path.islink(dataset_link) and not os.path.exists(dataset_link):
    try:
        os.symlink(DATASET_ROOT, dataset_link)
        print(f"üîó Symlinked {dataset_link} ‚Üí {DATASET_ROOT}")
    except OSError as e:
        print(f"‚ÑπÔ∏è Symlink skipped or failed: {e}")
else:
    print("‚ÑπÔ∏è Dataset link already exists.")


In [None]:

# üîç 7) Quick sanity checks
import os, glob

def preview_dir(path, n=10):
    print(f"Listing up to {n} items under: {path}")
    try:
        for i, name in enumerate(sorted(os.listdir(path))):
            print("  -", name)
            if i+1 >= n:
                break
    except Exception as e:
        print("Could not list:", e)

print("\nTop-level DATASET_ROOT:")
preview_dir(os.environ["DATASET_ROOT"], n=10)

cam1_train = os.path.join(os.environ["DATASET_ROOT"], "v2_cam1_cam2_ split_by_driver", "Camera 1", "train")
print("\nCamera 1/train class folders (first 10):")
preview_dir(cam1_train, n=10)

for cls in ["c0","c1","c2"]:
    cls_dir = os.path.join(cam1_train, cls)
    if os.path.isdir(cls_dir):
        num_imgs = len([p for p in glob.glob(os.path.join(cls_dir, "*")) if os.path.isfile(p)])
        print(f"  ‚Ä¢ {cls}: {num_imgs} files")


In [None]:

# ‚úÖ 8) Import smoke test (uses your package + config.py)
import sys, os
sys.path.append(PROJECT_ROOT)
sys.path.append(os.path.join(PROJECT_ROOT, "src"))  # <‚Äî lets Python find src/ddriver

try:
    import ddriver
    print("ddriver imported OK from:", ddriver.__file__)
    # Confirm config picks up Colab env:
    try:
        from ddriver import config
        print("Loaded ddriver.config successfully.")
        # Echo the resolved paths from config (they are pathlib.Path objects)
        print("config.DATASET_ROOT =", config.DATASET_ROOT)
        print("config.OUT_ROOT     =", config.OUT_ROOT)
        print("config.CKPT_ROOT    =", config.CKPT_ROOT)
        print("config.FAST_DATA    =", config.FAST_DATA)
    except Exception as e:
        print("Note: ddriver.config not imported:", e)
except Exception as e:
    print("‚ö†Ô∏è Import failed ‚Äî check package name/setup.")
    print(e)


# üìã 9) Generate Manifest and Split CSVs

This step creates the CSV files that tell your code where all the images are and which ones go to train/val/test.

**What this does:**
- Scans all your images in the dataset folder
- Creates a big list (manifest.csv) with info about every image
- Creates three smaller lists (train.csv, val.csv, test.csv) that say which images belong where
- Saves everything to your Google Drive so it's permanent

**Why we need this:**
- Your training code needs to know which images to use
- The manifest remembers which driver each image belongs to (for VAL split)
- The split CSVs organize images into train/val/test groups


In [None]:
# Run the manifest generator
# This is like asking a librarian to catalog all your books and create reading lists

import subprocess
import sys

# Make sure we can import ddriver
sys.path.insert(0, PROJECT_ROOT)

# Run the manifest script
# --write-split-lists means "also create train.csv, val.csv, test.csv files"
manifest_cmd = f"cd {PROJECT_ROOT} && python -m ddriver.data.manifest --write-split-lists"

print("üî® Generating manifest and split CSVs...")
print(f"Running: {manifest_cmd}\n")

result = subprocess.run(
    manifest_cmd,
    shell=True,
    capture_output=True,
    text=True
)

# Show what happened
print(result.stdout)
if result.stderr:
    print("Warnings/Errors:")
    print(result.stderr)

if result.returncode == 0:
    print("\n‚úÖ Manifest and split CSVs generated successfully!")
    print(f"   Manifest: {os.environ['OUT_ROOT']}/manifests/manifest.csv")
    print(f"   Train split: {os.environ['OUT_ROOT']}/splits/train.csv")
    print(f"   Val split: {os.environ['OUT_ROOT']}/splits/val.csv")
    print(f"   Test split: {os.environ['OUT_ROOT']}/splits/test.csv")
else:
    print(f"\n‚ùå Error generating manifest (exit code {result.returncode})")
    raise RuntimeError("Manifest generation failed")


In [None]:
# Quick check: Did the CSVs get created?
# This is like checking that the librarian actually wrote down all the book lists

import pandas as pd
from pathlib import Path

manifest_path = Path(os.environ['OUT_ROOT']) / "manifests" / "manifest.csv"
train_path = Path(os.environ['OUT_ROOT']) / "splits" / "train.csv"
val_path = Path(os.environ['OUT_ROOT']) / "splits" / "val.csv"
test_path = Path(os.environ['OUT_ROOT']) / "splits" / "test.csv"

print("üìä Checking CSV files...\n")

for name, path in [("Manifest", manifest_path), ("Train", train_path), ("Val", val_path), ("Test", test_path)]:
    if path.exists():
        df = pd.read_csv(path)
        print(f"‚úÖ {name}: {len(df)} rows, columns: {list(df.columns)}")
    else:
        print(f"‚ùå {name}: File not found at {path}")

# Show a sample from the manifest
if manifest_path.exists():
    print("\nüìÑ Sample from manifest (first 3 rows):")
    sample = pd.read_csv(manifest_path).head(3)
    print(sample[['path', 'class_id', 'driver_id', 'camera', 'split']].to_string())


# üß™ 10) Test dataset.py and datamod.py

Now let's make sure the code that loads images actually works!

**What we're testing:**
1. **dataset.py** - Can it load a single image and give us the right info?
2. **datamod.py** - Can it create data loaders that give us batches of images?

**Why test this:**
- If these don't work, training will fail
- Better to catch problems now than later
- We want to see that images load correctly and labels are right


In [None]:
# Test 1: Can dataset.py load a single image?
# This is like testing if a worker can fetch one book from the library

from ddriver.data.dataset import AucDriverDataset
from torchvision import transforms as T
from pathlib import Path

# Get paths from config
manifest_csv = Path(os.environ['OUT_ROOT']) / "manifests" / "manifest.csv"
val_split_csv = Path(os.environ['OUT_ROOT']) / "splits" / "val.csv"

print("üß™ Test 1: Testing AucDriverDataset (dataset.py)")
print(f"   Manifest: {manifest_csv}")
print(f"   Using Val split: {val_split_csv}\n")

try:
    # Create a simple dataset (no fancy transforms, just load the image)
    simple_transforms = T.ToTensor()  # Just convert to tensor, no augmentation
    
    val_dataset = AucDriverDataset(
        manifest_csv=manifest_csv,
        split_csv=val_split_csv,
        transforms=simple_transforms
    )
    
    print(f"‚úÖ Dataset created! It has {len(val_dataset)} images in VAL split")
    
    # Try to load the first image
    print("\nüìñ Loading first image from VAL split...")
    sample = val_dataset[0]
    
    print(f"‚úÖ Image loaded successfully!")
    print(f"   Image shape: {sample['image'].shape} (should be [3, height, width])")
    print(f"   Label: {sample['label']} (should be 0-9)")
    print(f"   Driver ID: {sample['driver_id']} (VAL should have driver IDs)")
    print(f"   Camera: {sample['camera']} (should be 'cam1' or 'cam2')")
    print(f"   Path: {sample['path'][:80]}...")  # Show first 80 chars
    
    # Check that label is valid (0-9)
    if 0 <= sample['label'] <= 9:
        print(f"   ‚úÖ Label is valid (0-9)")
    else:
        print(f"   ‚ùå Label {sample['label']} is NOT in range 0-9!")
    
    # Check that VAL has driver IDs
    if sample['driver_id'] is not None:
        print(f"   ‚úÖ VAL split has driver ID (as expected)")
    else:
        print(f"   ‚ö†Ô∏è  VAL split missing driver ID (might be okay if this image wasn't in your DRIVER_RANGES)")
    
    print("\n‚úÖ Test 1 PASSED: dataset.py works!")
    
except Exception as e:
    print(f"\n‚ùå Test 1 FAILED: {e}")
    import traceback
    traceback.print_exc()
    raise


# üßµ 11) Full pipeline (train ‚Üí predict ‚Üí metrics)

Now that data loading is working, these next cells show how to:
1. Register the model you want (e.g., `resnet18` from timm)
2. Run training from the command line helper
3. Generate predictions from a checkpoint
4. Evaluate metrics and save all logs to Drive

> You can change the `RUN_TAG`, model name, epochs, etc. in the code below.


In [None]:
# Register models you want to use (run once per runtime)
# This example uses timm's resnet18.

!pip -q install timm

from ddriver.models import registry

registry.register_timm_backbone("resnet18")
print("Available models:", registry.available_models()[:10])


## üöÇ 11.1 Train a model (adjust these knobs)

- Choose a `RUN_TAG` so logs/checkpoints go into `TFM/checkpoints/runs/<tag>/...`
- Set epochs/batch size to something small for a dry run (1 epoch, 16 batch)
- This command uses the CLI helper (`python -m src.ddriver.cli.train ...`)
- Logs + checkpoints are saved automatically to Google Drive


In [None]:
import subprocess, textwrap

RUN_TAG = "cam_mix_dryrun"      # change me for each experiment
MODEL_NAME = "resnet18"         # must be registered above
EPOCHS = 1                       # start tiny to make sure it works
BATCH_SIZE = 16
NUM_WORKERS = 2
IMAGE_SIZE = 224
LR = 1e-3

train_cmd = textwrap.dedent(f"""
cd {PROJECT_ROOT}
python -m src.ddriver.cli.train \
    --model-name {MODEL_NAME} \
    --epochs {EPOCHS} \
    --batch-size {BATCH_SIZE} \
    --num-workers {NUM_WORKERS} \
    --image-size {IMAGE_SIZE} \
    --lr {LR} \
    --out-tag {RUN_TAG}
""")

print("Running training command:\n", train_cmd)
result = subprocess.run(train_cmd, shell=True, text=True)
if result.returncode != 0:
    raise RuntimeError("Training command failed. See logs above.")
print("\n‚úÖ Training run complete!")


## üì¶ 11.2 Pick the latest checkpoint file

This cell looks inside `CKPT_ROOT/runs/<RUN_TAG>/` and grabs the newest `epoch_*.pt`. Use this path in the prediction step.


In [None]:
from pathlib import Path

run_base = Path(CKPT_ROOT) / "runs" / RUN_TAG
all_runs = sorted(run_base.glob("*/"))
if not all_runs:
    raise FileNotFoundError(f"No run folders found under {run_base}")
latest_run = all_runs[-1]

checkpoints = sorted(latest_run.glob("epoch_*.pt"))
if not checkpoints:
    raise FileNotFoundError(f"No checkpoints found under {latest_run}")
LATEST_CKPT = checkpoints[-1]

print("Latest run folder:", latest_run)
print("Using checkpoint:", LATEST_CKPT)


## üîÆ 11.3 Generate predictions CSV

- Uses the checkpoint above
- Choose which split to predict on (`val` or `test`)
- Saves CSV under `OUT_ROOT/preds/<split>/<out_tag>.csv`


In [None]:
PRED_SPLIT = "val"           # or "test"
PRED_TAG = f"{RUN_TAG}_{PRED_SPLIT}"

predict_cmd = textwrap.dedent(f"""
cd {PROJECT_ROOT}
python -m src.ddriver.cli.predict \
    --model-name {MODEL_NAME} \
    --checkpoint {LATEST_CKPT} \
    --split {PRED_SPLIT} \
    --batch-size {BATCH_SIZE} \
    --num-workers {NUM_WORKERS} \
    --image-size {IMAGE_SIZE} \
    --out-tag {PRED_TAG}
""")

print("Running prediction command:\n", predict_cmd)
result = subprocess.run(predict_cmd, shell=True, text=True)
if result.returncode != 0:
    raise RuntimeError("Prediction command failed. See logs above.")
print("\n‚úÖ Predictions completed! Check OUT_ROOT/preds/")


## üìä 11.4 Evaluate metrics

- Uses `src/ddriver/metrics.py`
- Reads the manifest + split CSV + predictions CSV
- Saves results under `OUT_ROOT/metrics/<tag>/<timestamp>/`
- Shows accuracy + macro F1 + per-driver/camera (optional)


In [None]:
from pathlib import Path

manifest_path = Path(OUT_ROOT) / "manifests" / "manifest.csv"
split_csv_path = Path(OUT_ROOT) / "splits" / f"{PRED_SPLIT}.csv"
preds_csv_path = Path(OUT_ROOT) / "preds" / PRED_SPLIT / f"{PRED_TAG}.csv"
METRICS_TAG = PRED_TAG

metrics_cmd = textwrap.dedent(f"""
cd {PROJECT_ROOT}
python -m src.ddriver.metrics \
    --manifest {manifest_path} \
    --split-csv {split_csv_path} \
    --predictions {preds_csv_path} \
    --out-tag {METRICS_TAG} \
    --per-driver \
    --per-camera
""")

print("Running metrics command:\n", metrics_cmd)
result = subprocess.run(metrics_cmd, shell=True, text=True)
if result.returncode != 0:
    raise RuntimeError("Metrics command failed. See logs above.")
print("\n‚úÖ Metrics saved under OUT_ROOT/metrics/")


In [None]:
# Test 2: Can datamod.py create data loaders and load batches?
# This is like testing if the teacher can organize students into groups and give them work

from ddriver.data.datamod import build_dataloaders, make_cfg_from_config
import torch

print("üß™ Test 2: Testing build_dataloaders (datamod.py)\n")

try:
    # Create config using the helper that uses ddriver.config paths
    # This is the easy way - it automatically finds your CSVs!
    cfg = make_cfg_from_config(
        batch_size=4,  # Small batch for testing (faster)
        num_workers=2,  # Use 2 workers (Colab might have limited CPUs)
        image_size=224,  # Standard image size
    )
    
    print("‚úÖ Config created using ddriver.config paths:")
    print(f"   Manifest: {cfg.manifest_csv}")
    print(f"   Train: {cfg.train_split_csv}")
    print(f"   Val: {cfg.val_split_csv}")
    print(f"   Test: {cfg.test_split_csv}\n")
    
    # Build the data loaders
    print("üî® Building data loaders...")
    loaders = build_dataloaders(cfg)
    
    print("‚úÖ Data loaders created!")
    print(f"   Available splits: {list(loaders.keys())}\n")
    
    # Test train loader
    print("üì¶ Testing TRAIN loader...")
    train_loader = loaders["train"]
    train_batch = next(iter(train_loader))
    
    print(f"   ‚úÖ Train batch loaded!")
    print(f"   Batch size: {train_batch['image'].shape[0]} images")
    print(f"   Image shape: {train_batch['image'].shape} (should be [batch_size, 3, 224, 224])")
    print(f"   Labels: {train_batch['label'].tolist()} (should be list of 0-9)")
    print(f"   Driver IDs: {train_batch['driver_id']} (train should mostly be None)")
    print(f"   Cameras: {train_batch['camera']}")
    
    # Check image shape is correct
    expected_shape = (cfg.batch_size, 3, cfg.image_size, cfg.image_size)
    if train_batch['image'].shape == expected_shape:
        print(f"   ‚úÖ Image shape is correct: {train_batch['image'].shape}")
    else:
        print(f"   ‚ùå Image shape wrong! Got {train_batch['image'].shape}, expected {expected_shape}")
    
    # Test val loader
    print("\nüì¶ Testing VAL loader...")
    val_loader = loaders["val"]
    val_batch = next(iter(val_loader))
    
    print(f"   ‚úÖ Val batch loaded!")
    print(f"   Batch size: {val_batch['image'].shape[0]} images")
    print(f"   Image shape: {val_batch['image'].shape}")
    print(f"   Labels: {val_batch['label'].tolist()}")
    print(f"   Driver IDs: {val_batch['driver_id']} (VAL should have driver IDs!)")
    
    # Check that VAL has driver IDs
    val_has_ids = any(did is not None for did in val_batch['driver_id'])
    if val_has_ids:
        print(f"   ‚úÖ VAL batch has driver IDs (as expected)")
    else:
        print(f"   ‚ö†Ô∏è  VAL batch missing driver IDs (check your DRIVER_RANGES in manifest.py)")
    
    # Test that images are normalized (should be in range roughly -2 to 2 after ImageNet normalization)
    img_min, img_max = train_batch['image'].min().item(), train_batch['image'].max().item()
    print(f"\n   Image value range: [{img_min:.3f}, {img_max:.3f}]")
    print(f"   (Should be roughly -2 to 2 after ImageNet normalization)")
    
    print("\n‚úÖ Test 2 PASSED: datamod.py works! Data loaders are ready for training!")
    
except Exception as e:
    print(f"\n‚ùå Test 2 FAILED: {e}")
    import traceback
    traceback.print_exc()
    raise


### ‚úÖ You're all set!

**What just happened:**
1. ‚úÖ Mounted Google Drive
2. ‚úÖ Cloned/updated your repo
3. ‚úÖ Installed the package
4. ‚úÖ Set up paths (works on Colab and Mac!)
5. ‚úÖ Generated manifest.csv and train/val/test split CSVs
6. ‚úÖ Tested that dataset.py can load images
7. ‚úÖ Tested that datamod.py can create data loaders
8. ‚úÖ (Optional) Registered a model + ran training ‚Üí prediction ‚Üí metrics pipeline

**Your CSVs are saved in Google Drive:**
- `OUT_ROOT/manifests/manifest.csv` - Big list of all images
- `OUT_ROOT/splits/train.csv` - Training images
- `OUT_ROOT/splits/val.csv` - Validation images (with driver IDs!)
- `OUT_ROOT/splits/test.csv` - Test images

**Next steps:**
- Adjust the training/prediction cells (epochs, batch size, tags) to run bigger experiments
- All paths use `ddriver.config` so it works on Colab and Mac
- Re-run **Clone/Update** cell after pushing new commits
- Optional: copy some data into `/content/data` to use `FAST_DATA` for speed



### ‚úÖ You‚Äôre set!
- Your repo + URL are **hardcoded**.
- `ddriver.config` will see the Colab env vars and resolve paths there.
- Re-run **Clone/Update** after pushing new commits.
- Optional: copy some data into `/content/data` to use `FAST_DATA` for speed, then call `ddriver.config.dataset_dir(prefer_fast=True)` in your scripts.


In [None]:
# ---- Colab cell: append metrics + params to Google Sheet ----
!pip -q install gspread

import json, os
from pathlib import Path
import gspread
from ddriver import config

from google.colab import auth
auth.authenticate_user()
gc = gspread.authorize(gspread.auth.default())

import gspread
from google.colab import auth
import google.auth

auth.authenticate_user()
creds, _ = google.auth.default()
gc = gspread.authorize(creds)

SHEET_NAME = "TFM Logs"   # change if needed
WORKSHEET  = "Sheet1"     # or whatever tab name

# Point to your latest run folder (paste it from the console printout)
run_dir = config.OUT_ROOT / "runs" / "val_run_test" / "2025-11-23_2-50"

metrics = json.loads((run_dir / "metrics.json").read_text())
inputs  = json.loads((run_dir / "inputs.json").read_text())
params_path = run_dir / "params.json"
params = json.loads(params_path.read_text()) if params_path.exists() else {}

ws = gc.open(SHEET_NAME).worksheet(WORKSHEET)

row = [
  str(run_dir),                        # Run folder
  inputs.get("predictions",""),        # Predictions file
  inputs.get("split_source",""),       # Split source
  metrics["num_examples"],             # Support
  round(metrics["overall"]["accuracy"], 4),
  round(metrics["overall"]["macro_avg"]["f1"], 4),
  json.dumps(params, sort_keys=True)[:500],  # params preview (trim)
]
ws.append_row(row, value_input_option="USER_ENTERED")
print("Appended to Google Sheet ‚úÖ")
