
# 🚀 Colab Setup for **ddriver** (Repo + Google Drive data)

This notebook wires up your GitHub repo to Colab **and** mounts your Google Drive dataset exactly like your local setup.  
It supports:
- Pulling the latest code from GitHub (or updating if already cloned).
- Installing your package in editable mode (`pip install -e .` via `pyproject.toml`).
- Mounting Google Drive and setting environment variables for datasets, outputs, and checkpoints.
- Creating a symlink so your repo's `data/` mirrors your Drive dataset path.
- Quick sanity checks (GPU, folder counts, import smoke test).

> Tip: Run cells **top to bottom** the first time. Re-run the **Update repo** cell anytime you push new commits.


In [None]:

# 🔧 0) Quick GPU sanity check (optional)
!nvidia-smi || echo "No GPU detected — CPU runtime is okay for setup steps."


In [None]:

# 🔧 1) User config — EDIT THESE for your repo if needed
REPO_URL = "https://github.com/your-username/your-ddriver-repo.git"  # ⬅️ paste your repo URL
BRANCH   = "main"                                                    # or "dev", etc.

# If you keep the default Google Drive layout from your project:
# My Drive/TFM/{checkpoints,data,notebooks,outputs}
DRIVE_TFM_DIR = "/content/drive/MyDrive/TFM"

# Optional: If your repo folder name differs from the GitHub repo name, hardcode it here.
# Otherwise, we'll derive it automatically from REPO_URL.
PROJECT_DIRNAME_OVERRIDE = None  # e.g., "ddriver"


In [None]:

# 🔌 2) Mount Google Drive (you'll be prompted to authorize once)
from google.colab import drive
drive.mount('/content/drive', force_remount=False)
print("✅ Drive mounted.")


In [None]:

# 📦 3) Clone or update your repo into /content
import os, re, subprocess, sys, json, pathlib, shutil

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

# Derive repo dirname from REPO_URL if not overridden
if PROJECT_DIRNAME_OVERRIDE:
    repo_dirname = PROJECT_DIRNAME_OVERRIDE
else:
    m = re.search(r"/([^/]+)\.git$", REPO_URL.strip())
    if not m:
        raise ValueError("Could not infer repo folder name from REPO_URL. Set PROJECT_DIRNAME_OVERRIDE.")
    repo_dirname = m.group(1)

PROJECT_ROOT = f"/content/{repo_dirname}"
if os.path.isdir(PROJECT_ROOT):
    print(f"📁 Repo already present at {PROJECT_ROOT}. Pulling latest on branch {BRANCH}...")
    sh(f"cd {PROJECT_ROOT} && git fetch origin {BRANCH} && git checkout {BRANCH} && git pull --rebase origin {BRANCH}")
else:
    print(f"⬇️ Cloning {REPO_URL} → {PROJECT_ROOT}")
    sh(f"git clone --branch {BRANCH} {REPO_URL} {PROJECT_ROOT}")

print("PROJECT_ROOT =", PROJECT_ROOT)


In [None]:

# 🧰 4) Install your package (editable) + optional requirements.txt
import os, subprocess, glob, textwrap, sys

def sh(cmd):
    print(f"\n$ {cmd}")
    rc = subprocess.call(cmd, shell=True, executable="/bin/bash")
    if rc != 0:
        raise RuntimeError(f"Command failed with exit code {rc}: {cmd}")

print("🔄 Upgrading pip/setuptools/wheel...")
sh("python -m pip install --upgrade pip setuptools wheel")

print("📦 Editable install from pyproject.toml (if present)...")
has_pyproject = os.path.exists(os.path.join(PROJECT_ROOT, "pyproject.toml"))
if has_pyproject:
    sh(f"cd {PROJECT_ROOT} && pip install -e .")
else:
    print("⚠️ No pyproject.toml found. Skipping editable install.")

req_path = os.path.join(PROJECT_ROOT, "requirements.txt")
if os.path.exists(req_path):
    print("📝 Installing requirements.txt...")
    sh(f"pip install -r {req_path}")
else:
    print("ℹ️ No requirements.txt found — continuing.")


In [None]:

# 🌳 5) Set environment & symlink data so your code finds it the same way as on local
import os, json, pathlib, shutil, subprocess

# Drive → dataset, outputs, checkpoints
DATASET_ROOT = f"{DRIVE_TFM_DIR}/data/auc.distracted.driver.dataset_v2"
OUT_ROOT       = f"{DRIVE_TFM_DIR}/outputs"
CHECKPOINTS    = f"{DRIVE_TFM_DIR}/checkpoints"

# Project-local working data dir (ephemeral in Colab VM, safe to recreate):
LOCAL_DATA_DIR = f"{PROJECT_ROOT}/data"

# Create project-local data dir if missing
os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

# Create a symlink so PROJECT_ROOT/data points at the Drive dataset root (read-only is fine)
symlink_target = DATASET_ROOT
symlink_path = os.path.join(LOCAL_DATA_DIR, "auc.distracted.driver.dataset_v2")
if not os.path.islink(symlink_path) and not os.path.exists(symlink_path):
    try:
        os.symlink(symlink_target, symlink_path)
        print(f"🔗 Symlinked {symlink_path} → {symlink_target}")
    except OSError as e:
        print(f"⚠️ Symlink failed ({e}). Will rely on env vars instead.")

# Export env vars so your code (and .env reads) behave consistently
os.environ["PROJECT_ROOT"]   = PROJECT_ROOT
os.environ["DATASET_ROOT"]   = DATASET_ROOT
os.environ["OUT_ROOT"]       = OUT_ROOT
os.environ["CHECKPOINTS"]    = CHECKPOINTS

# Also write a lightweight .env file at repo root for libraries that read it
env_text = f"""PROJECT_ROOT={PROJECT_ROOT}
DATASET_ROOT={DATASET_ROOT}
OUT_ROOT={OUT_ROOT}
CHECKPOINTS={CHECKPOINTS}
"""
with open(os.path.join(PROJECT_ROOT, ".env"), "w") as f:
    f.write(env_text)

print("✅ Environment configured.")
print("\nSummary:")
for k in ["PROJECT_ROOT","DATASET_ROOT","OUT_ROOT","CHECKPOINTS"]:
    print(f"{k} = {os.environ[k]}")


In [None]:

# 🔍 6) Quick dataset sanity checks (non-exhaustive, adjust as you like)
import os, glob, itertools

def preview_dir(path, n=5):
    print(f"Listing up to {n} items under: {path}")
    try:
        for i, name in enumerate(sorted(os.listdir(path))):
            print("  -", name)
            if i+1 >= n:
                break
    except Exception as e:
        print("Could not list:", e)

print("\nTop-level dataset root:")
preview_dir(os.path.join(os.environ["DATASET_ROOT"]))

print("\nCamera 1/train class folders (first 10):")
cam1_train = os.path.join(os.environ["DATASET_ROOT"], "v2_cam1_cam2_split_by_driver", "Camera 1", "train")
preview_dir(cam1_train, n=10)

# Count a couple of class samples quickly
for cls in ["c0","c1","c2"]:
    cls_dir = os.path.join(cam1_train, cls)
    if os.path.isdir(cls_dir):
        num_imgs = len([p for p in glob.glob(os.path.join(cls_dir, "*")) if os.path.isfile(p)])
        print(f"  • {cls}: {num_imgs} files")


In [None]:

# ✅ 7) Import smoke test for your package
try:
    import sys, importlib, os
    sys.path.append(PROJECT_ROOT)  # ensure src/ is importable if needed
    import ddriver
    print("ddriver imported OK from:", ddriver.__file__)
    # If you have ddriver.config, try reading envs
    try:
        from ddriver import config
        print("Loaded ddriver.config successfully.")
    except Exception as e:
        print("Note: ddriver.config not imported:", e)
except Exception as e:
    print("⚠️ Import failed — check that your package name is 'ddriver' and that setup is correct.")
    print(e)



### ✅ You’re set!
- Push new code to GitHub, then **re-run the _Clone/Update_ and _Install_ cells** to grab it here.
- Your dataset, outputs, and checkpoints live under Google Drive: `My Drive/TFM/...`.
- Your repo is at `/content/<repo-name>`.

If anything breaks, re-run from the top, or `Runtime → Restart runtime`, then start again.
