# Unified Experiments Runner

Run experiment combinations across datasets and architectures.

- Datasets: BraTS, MSD Liver, TotalSegmentator
- Architectures: UNet, UNETR, SegResNet

On Colab, this notebook mounts Google Drive, installs requirements, and clones the repository. Use the runner cell to execute all combinations or adjust selections.


In [None]:
# Detect Colab and optionally mount Google Drive
IN_COLAB = False
try:
    import google.colab  # type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

print(f"Running in Colab: {IN_COLAB}")

if IN_COLAB:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')

# Ensure correct working directory when cloned in Colab
import os
import sys
from pathlib import Path
import subprocess
from datetime import datetime

if IN_COLAB:
    # Persist repository under Drive (current name with typo)
    repo_dir = Path('/content/drive/MyDrive/3d_medical_segemntation')
    if not repo_dir.exists():
        subprocess.run(['git','clone','-q','https://github.com/Thabhelo/3d_medical_segemntation.git', str(repo_dir)], check=True)
    else:
        # Try fast-forward; if it fails, hard reset to origin/main; if still broken, reclone
        try:
            subprocess.run(['git','-C', str(repo_dir), 'fetch', 'origin'], check=True)
            subprocess.run(['git','-C', str(repo_dir), 'checkout', 'main'], check=True)
            subprocess.run(['git','-C', str(repo_dir), 'pull', '--ff-only'], check=True)
        except Exception:
            try:
                subprocess.run(['git','-C', str(repo_dir), 'reset', '--hard', 'origin/main'], check=True)
            except Exception:
                # Backup and reclone cleanly
                backup = repo_dir.with_name(repo_dir.name + '_backup_' + datetime.now().strftime('%Y%m%d_%H%M%S'))
                try:
                    repo_dir.rename(backup)
                except Exception:
                    pass
                subprocess.run(['git','clone','-q','https://github.com/Thabhelo/3d_medical_segemntation.git', str(repo_dir)], check=True)
    os.chdir(repo_dir)

sys.path.insert(0, str(Path.cwd() / 'src'))
print(f"CWD: {Path.cwd()}")


In [None]:
# Install dependencies (Colab) — Python 3.12 compatible
if IN_COLAB:
    import os, subprocess
    cuda_idx = ['--index-url','https://download.pytorch.org/whl/cu121'] if os.path.exists('/proc/driver/nvidia') else ['--index-url','https://download.pytorch.org/whl/cpu']
    cmds = [
        ['pip','install','-q','--upgrade','pip','setuptools','wheel'],
        ['pip','install','-q','torch==2.4.0','torchvision==0.19.0', *cuda_idx],
        ['pip','install','-q','monai-weekly','numpy>=1.26.4','scipy>=1.12','nibabel','SimpleITK','PyYAML','tqdm','tensorboard','matplotlib>=3.7','seaborn>=0.12','scikit-learn>=1.3','pandas>=2.0']
    ]
    for c in cmds:
        print('Running:', ' '.join(c))
        subprocess.run(c, check=True)
    print('Dependencies installed.')
else:
    print('Local environment detected; ensure requirements are installed.')


In [None]:
# Quick verification: environment, GPU, datasets, and repo state
import os, sys, subprocess
from pathlib import Path
import torch

print('Python:', sys.version.split()[0])
print('PyTorch:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('GPU:', torch.cuda.get_device_name(0))

# Check datasets
is_colab = 'google.colab' in sys.modules or os.path.exists('/content')
DATASETS_DIR = Path('/content/drive/MyDrive/datasets' if is_colab else Path.home() / 'Downloads/datasets')
print('Datasets dir:', DATASETS_DIR)
for name in ['BraTS','MSD','TotalSegmentator']:
    p = DATASETS_DIR / name
    print(f'  {name}:', 'FOUND' if p.exists() else 'MISSING')

# Show git commit and remote
try:
    commit = subprocess.check_output(['git','rev-parse','--short','HEAD']).decode().strip()
    remote = subprocess.check_output(['git','remote','get-url','origin']).decode().strip()
    print('Repo commit:', commit)
    print('Remote:', remote)
except Exception as e:
    print('Git info unavailable:', e)


In [None]:
# Runner: iterate over datasets x architectures
from pathlib import Path
import itertools
import subprocess
import sys

# Configurable selections
DATASETS = ['brats', 'msd_liver', 'totalsegmentator']
ARCHITECTURES = ['unet', 'unetr', 'segresnet']
MAX_EPOCHS = 2
BATCH_SIZE = 2
NUM_WORKERS = 2
OUTPUT_BASE = Path('results/colab_runs')

OUTPUT_BASE.mkdir(parents=True, exist_ok=True)

print('Dataset root auto-detected by train_model.py based on environment.')

runs = list(itertools.product(DATASETS, ARCHITECTURES))
print(f'Total runs: {len(runs)}')

for ds, arch in runs:
    out_dir = OUTPUT_BASE / f'{ds}_{arch}'
    cmd = [
        sys.executable,
        'scripts/train_model.py',
        '--dataset', ds,
        '--architecture', arch,
        '--max_epochs', str(MAX_EPOCHS),
        '--batch_size', str(BATCH_SIZE),
        '--num_workers', str(NUM_WORKERS),
        '--output_dir', str(out_dir)
    ]
    print('\n=== Running:', ' '.join(cmd))
    subprocess.run(cmd, check=False)

print('\nAll runs completed.')
