# TotalSegmentator Training - Parallel Execution

This notebook trains all 3 architectures on TotalSegmentator dataset.
Run this in a separate Colab tab for parallel execution with MSD Liver training.

- Models: UNet, UNETR, SegResNet
- Epochs: 100 each
- Estimated time: ~4-5 hours total


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to repository
import os
import subprocess
from pathlib import Path

repo_dir = Path('/content/drive/MyDrive/3d_medical_segemntation')
if not repo_dir.exists():
    print("Repository not found! Make sure the main notebook has cloned it.")
    raise FileNotFoundError(f"Repository not found at {repo_dir}")

os.chdir(repo_dir)
print(f"Working directory: {Path.cwd()}")

# Pull latest code
subprocess.run(['git', 'pull', '--ff-only'], check=True)
print("Code updated to latest version")


In [None]:
# Install dependencies (Python 3.12 compatible)
import subprocess

print("Installing dependencies...")
cmds = [
    ['pip', 'install', '-q', '--upgrade', 'pip', 'setuptools', 'wheel'],
    ['pip', 'install', '-q', 'torch==2.4.0', 'torchvision==0.19.0', '--index-url', 'https://download.pytorch.org/whl/cu121'],
    ['pip', 'install', '-q', 'monai-weekly', 'numpy>=1.26.4', 'scipy>=1.12', 'nibabel', 'SimpleITK', 'PyYAML', 'tqdm', 'tensorboard', 'matplotlib>=3.7', 'seaborn>=0.12', 'scikit-learn>=1.3', 'pandas>=2.0']
]

for cmd in cmds:
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=True)

print("Dependencies installed successfully!")


In [None]:
# Verify environment and GPU
import torch
import sys

print(f"Python: {sys.version.split()[0]}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

# Check TotalSegmentator dataset structure
dataset_path = Path('/content/drive/MyDrive/datasets/TotalSegmentator')
print(f"\nTotalSegmentator dataset: {'FOUND' if dataset_path.exists() else 'MISSING'}")
if dataset_path.exists():
    subject_dirs = [d for d in dataset_path.iterdir() if d.is_dir() and d.name.startswith('s')]
    print(f"Subject directories found: {len(subject_dirs)}")
    if subject_dirs:
        sample = subject_dirs[0]
        print(f"Sample directory ({sample.name}) contents:")
        for item in sample.iterdir():
            if item.is_file():
                size_mb = item.stat().st_size / (1024 * 1024)
                print(f"  FILE: {item.name} ({size_mb:.1f} MB)")
            else:
                print(f"  DIR:  {item.name}/")
        
        # Check if there's a combined_labels file (from our postprocessing)
        combined = sample / 'combined_labels.nii.gz'
        print(f"\nCombined labels file: {'FOUND' if combined.exists() else 'MISSING'}")
        
        # Look for any .nii.gz files in the directory
        nii_files = list(sample.glob('*.nii.gz'))
        print(f"NIfTI files in {sample.name}: {len(nii_files)}")
        for nii in nii_files[:5]:  # Show first 5
            print(f"  {nii.name}")


In [None]:
# BATCH 3: Train TotalSegmentator dataset
# This runs in parallel with MSD Liver training in the other tab
import sys, subprocess

print('='*80)
print('BATCH 3: Training TotalSegmentator Dataset (PARALLEL EXECUTION)')
print('Estimated time: ~4-5 hours for 3 models')
print('='*80)

proc = subprocess.Popen(
    [sys.executable, '-u', 'scripts/run_batch_3_totalsegmentator.py'],
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1
)

for line in proc.stdout:
    print(line, end='', flush=True)

proc.wait()
print(f"\nCompleted with exit code: {proc.returncode}")


In [None]:
# Check training results
from pathlib import Path

print("TotalSegmentator Training Results:")
print("="*50)

for arch in ['unet', 'unetr', 'segresnet']:
    checkpoint = Path(f'results/colab_runs/totalsegmentator_{arch}/best.pth')
    if checkpoint.exists():
        size_mb = checkpoint.stat().st_size / (1024 * 1024)
        print(f"✅ {arch:12} - {size_mb:.1f} MB")
    else:
        print(f"❌ {arch:12} - Not completed")

print("\nTo see detailed results, run the evaluation script in the main notebook.")
