# üöÄ 00 ‚Äî Colab Setup

**Purpose:** Core environment setup for all CNNs-distracted-driving notebooks.

Run this notebook **first** in any new Colab session, OR use the inline setup cells in other notebooks.

**What this does:**
1. Check GPU availability
2. Mount Google Drive
3. Clone/update the repo
4. Install the package
5. Configure environment variables
6. Verify imports work

**Optional:** Copy full dataset images to `/content` for faster I/O during training.


In [None]:
# üîß 0) Quick GPU check
!nvidia-smi || echo "No GPU detected ‚Äî CPU runtime is okay for setup steps."


In [None]:
# üîß 1) Fixed config for your repo + Drive layout
import os

REPO_URL       = "https://github.com/ClaudiaCPach/CNNs-distracted-driving"
REPO_DIRNAME   = "CNNs-distracted-driving"
BRANCH         = "main"
PROJECT_ROOT   = f"/content/{REPO_DIRNAME}"

# Your persistent Google Drive base folder:
DRIVE_PATH       = "/content/drive/MyDrive/TFM"
DRIVE_DATA_ROOT  = f"{DRIVE_PATH}/data"

# Fast ephemeral workspace inside the VM
FAST_DATA        = "/content/data"

# Start with Drive as the canonical dataset root
DATASET_ROOT     = DRIVE_DATA_ROOT
OUT_ROOT         = f"{DRIVE_PATH}/outputs"
CKPT_ROOT        = f"{DRIVE_PATH}/checkpoints"


In [None]:
# üîå 2) Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=False)
print("‚úÖ Drive mounted.")


In [None]:
# üìÅ 3) Clone or update the repo
import os, subprocess

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

if os.path.isdir(PROJECT_ROOT):
    print(f"üìÅ Repo already present at {PROJECT_ROOT}. Pulling latest on branch {BRANCH}...")
    sh(f"cd {PROJECT_ROOT} && git fetch origin {BRANCH} && git checkout {BRANCH} && git pull --rebase origin {BRANCH}")
else:
    print(f"‚¨áÔ∏è Cloning {REPO_URL} ‚Üí {PROJECT_ROOT}")
    sh(f"git clone --branch {BRANCH} {REPO_URL} {PROJECT_ROOT}")

print("PROJECT_ROOT =", PROJECT_ROOT)


In [None]:
# üì¶ 4) Install the repo (editable) + requirements
import os, subprocess

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

print("üîÑ Upgrading pip/setuptools/wheel...")
sh("python -m pip install --upgrade pip setuptools wheel")

has_pyproject = os.path.exists(os.path.join(PROJECT_ROOT, "pyproject.toml"))
if has_pyproject:
    print("üì¶ Editable install from pyproject.toml ...")
    sh(f"cd {PROJECT_ROOT} && pip install -e .")
else:
    print("‚ö†Ô∏è No pyproject.toml found. Skipping editable install.")

req_path = os.path.join(PROJECT_ROOT, "requirements.txt")
if os.path.exists(req_path):
    print("üìù Installing requirements.txt...")
    sh(f"pip install -r {req_path}")
else:
    print("‚ÑπÔ∏è No requirements.txt found ‚Äî continuing.")


In [None]:
# üå≥ 5) Configure environment for ddriver.config
import os

os.environ["DRIVE_PATH"]   = DRIVE_PATH
os.environ["DATASET_ROOT"] = DATASET_ROOT
os.environ["OUT_ROOT"]     = OUT_ROOT
os.environ["CKPT_ROOT"]    = CKPT_ROOT
os.environ["FAST_DATA"]    = FAST_DATA

# Write .env file for code that uses load_dotenv()
env_text = f"""DRIVE_PATH={DRIVE_PATH}
DATASET_ROOT={DATASET_ROOT}
OUT_ROOT={OUT_ROOT}
CKPT_ROOT={CKPT_ROOT}
FAST_DATA={FAST_DATA}
"""
with open(os.path.join(PROJECT_ROOT, ".env"), "w") as f:
    f.write(env_text)

print("‚úÖ Environment variables set for ddriver.config")
print("\nSummary:")
for k in ["DRIVE_PATH","DATASET_ROOT","OUT_ROOT","CKPT_ROOT","FAST_DATA"]:
    print(f"{k} = {os.environ[k]}")


In [None]:
# ‚úÖ 6) Import smoke test
import sys, os
sys.path.append(PROJECT_ROOT)
sys.path.append(os.path.join(PROJECT_ROOT, "src"))

try:
    import ddriver
    print("ddriver imported OK from:", ddriver.__file__)
    try:
        from ddriver import config
        print("Loaded ddriver.config successfully.")
        print("config.DATASET_ROOT =", config.DATASET_ROOT)
        print("config.OUT_ROOT     =", config.OUT_ROOT)
        print("config.CKPT_ROOT    =", config.CKPT_ROOT)
        print("config.FAST_DATA    =", config.FAST_DATA)
    except Exception as e:
        print("Note: ddriver.config not imported:", e)
except Exception as e:
    print("‚ö†Ô∏è Import failed ‚Äî check package name/setup.")
    print(e)


## ‚ö° Optional: Copy Full Dataset to /content for Faster I/O

Run this cell to copy the original images to `/content/data` for faster training.
Skip if you're using hybrid crops instead.


In [None]:
# ‚ö° Copy + compress full dataset to /content/data (optional, faster I/O)
# Re-encodes JPEGs once (quality 80, short side 320px)

import importlib
import os
from pathlib import Path

from ddriver.data.fastcopy import CompressionSpec, copy_splits_with_compression

SRC_ROOT = Path(DRIVE_DATA_ROOT) / "auc.distracted.driver.dataset_v2"
DST_ROOT = Path(FAST_DATA) / "auc.distracted.driver.dataset_v2"

split_csvs = {
    "train": Path(OUT_ROOT) / "splits" / "train.csv",
    "val": Path(OUT_ROOT) / "splits" / "val.csv",
    "test": Path(OUT_ROOT) / "splits" / "test.csv",
    "train_small": Path(OUT_ROOT) / "splits" / "train_small.csv",
}

compression_spec = CompressionSpec(
    target_short_side=320,
    jpeg_quality=80,
)

summary = copy_splits_with_compression(
    split_csvs=split_csvs,
    src_root=SRC_ROOT,
    dst_root=DST_ROOT,
    compression=compression_spec,
    skip_existing=True,
)

print(f"\nüìâ FAST_DATA copy stats: processed {summary['processed']} of {summary['total']} files "
      f"(skipped {summary['skipped']} already present).")
print(f"Compressed dataset root: {summary['dst_root']}")

# Update DATASET_ROOT to point to local copy
DATASET_ROOT = FAST_DATA
os.environ["DATASET_ROOT"] = str(DATASET_ROOT)
try:
    from ddriver import config as _ddriver_config
    importlib.reload(_ddriver_config)
    print("\n‚ö° Copy complete. DATASET_ROOT now points to the local FAST_DATA copy:")
    print("   ddriver.config.DATASET_ROOT =", _ddriver_config.DATASET_ROOT)
except Exception as exc:
    print("\n‚ö° Copy complete. DATASET_ROOT env updated:", exc)


## ‚úÖ Setup Complete!

You can now:
- Run **01_data_preparation.ipynb** to generate manifests or extract hybrid crops
- Run **02_training.ipynb** to train models
- Run **03_evaluation.ipynb** to generate predictions and metrics
- Run **04_modality_analysis.ipynb** for per-class comparison
- Run **05_gradcam.ipynb** for Grad-CAM visualizations

Each notebook has its own inline setup cells, so you can also run them independently.
