# 3D Medical Segmentation - Setup and Data EDA

Environment-aware setup for 3D medical image segmentation. Works on both Google Colab and Linux environments.

**Features:**
- Automatic environment detection (Colab vs Linux)
- Environment-appropriate dataset paths
- Existing results detection and management
- GPU/CPU fallback support

In [None]:
# If running in Colab, mount Drive
try:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')
    IN_COLAB = True
except Exception:
    IN_COLAB = False
print('IN_COLAB =', IN_COLAB)

In [None]:
# Install dependencies (environment-aware)
import os, sys, subprocess
from pathlib import Path

# Check if we're in Colab
if 'google.colab' in str(get_ipython()):
    # Colab-specific setup
    repo_url = 'https://github.com/Thabhelo/3d_medical_segemntation.git'
    drive_root = '/content/drive/MyDrive'
    repo_dir = f'{drive_root}/3d_medical_segmentation'
    
    if not os.path.exists(repo_dir):
        subprocess.run(['git', 'clone', repo_url, repo_dir], check=True)
    if repo_dir not in sys.path:
        sys.path.append(repo_dir)
    
    # Pin compatible binary stack to avoid numpy/ABI mismatch
    subprocess.run(['python', '-m', 'pip', 'install', '-q', '-U', 'pip'], check=True)
    subprocess.run(['python', '-m', 'pip', 'install', '-q', '--force-reinstall', 'numpy==1.26.4'], check=True)
    subprocess.run(['python', '-m', 'pip', 'install', '-q', 'scipy>=1.12,<1.14', 'torch==2.3.0', 'torchvision==0.18.0', 'monai[all]==1.3.0'], check=True)
    subprocess.run(['python', '-m', 'pip', 'install', '-q', '-e', repo_dir], check=True)
    print('Colab dependencies prepared.')
else:
    # Linux/local environment setup
    current_dir = Path.cwd()
    if current_dir.name == '3d_medical_segemntation':
        # We're in the project directory
        if 'src' not in sys.path:
            sys.path.append('src')
        print('Using local project directory.')
    else:
        # Try to find the project in Downloads
        downloads_path = Path.home() / 'Downloads' / '3d_medical_segmentation'
        if downloads_path.exists():
            os.chdir(downloads_path)
            if 'src' not in sys.path:
                sys.path.append('src')
            print(f'Using project from Downloads: {downloads_path}')
        else:
            print('Project directory not found. Please ensure the project is in Downloads/3d_medical_segmentation/')
    
    # Check if dependencies are available
    try:
        import torch
        import monai
        print('Dependencies already available.')
    except ImportError:
        print('Installing dependencies...')
        subprocess.run(['pip', 'install', '-r', 'requirements.txt'], check=True)
        print('Dependencies installed.')

In [None]:
# Set paths (environment-aware with runtime detection)
import os
import sys
from pathlib import Path

# Add src to path for runtime utilities
if 'src' not in sys.path:
    sys.path.append('src')

from src.utils.runtime import get_runtime_info, get_dataset_root, get_output_root

# Get runtime information
runtime_info = get_runtime_info()
print("Runtime Environment:")
for key, value in runtime_info.items():
    print(f"  {key}: {value}")
print()

# Use environment-appropriate paths
data_root = get_dataset_root()
project_root = Path.cwd()  # Current working directory

print('data_root:', data_root)
print('project_root:', project_root)

In [None]:
# Quick EDA: list datasets and sample files
import os
name_synonyms = {
    'BraTS': ['BraTS', 'brats', 'BRATS', 'BraTS2021'],
    'MSD': ['MSD', 'MSD_Liver', 'Task03_Liver'],
    'TotalSegmentator': ['TotalSegmentator']
}

print("Dataset Analysis:")
print("=" * 50)

for canonical, options in name_synonyms.items():
    p = None
    for opt in options:
        candidate = data_root / opt
        if candidate.exists():
            p = candidate
            break
    
    print(f"\nDataset: {canonical}")
    print(f"  Path: {p if p else (data_root / options[0])}")
    
    if p and p.exists():
        print(f"  Status: ✓ Found")
        # Count files
        file_count = 0
        for root, dirs, files in os.walk(p):
            file_count += len(files)
            if file_count <= 10:  # Show first few files
                print(f"    {root}")
                for f in files[:3]:
                    print(f"      - {f}")
        print(f"  Total files: {file_count}")
    else:
        if is_colab():
            print("  Status: ✗ MISSING - Please sync to Google Drive")
        else:
            print("  Status: ✗ MISSING - Please download to ~/Downloads/datasets/")

print(f"\nEnvironment: {'Colab' if is_colab() else 'Linux'}")
print(f"Dataset root: {data_root}")
print(f"Results root: {get_output_root()}")


In [None]:
# Check for existing results and GPU availability
import torch
from pathlib import Path

# Check GPU availability
if torch.cuda.is_available():
    print(f"GPU available: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
    device = "cuda"
else:
    print("No GPU available, will use CPU")
    device = "cpu"

# Check for existing results
results_dir = get_output_root()
if results_dir.exists():
    print(f"\nExisting results found in: {results_dir}")
    result_dirs = [d for d in results_dir.iterdir() if d.is_dir()]
    if result_dirs:
        print("Available result directories:")
        for result_dir in sorted(result_dirs):
            # Check for checkpoints
            checkpoints = list(result_dir.glob("*.pth"))
            if checkpoints:
                print(f"  - {result_dir.name}: {len(checkpoints)} checkpoints")
            else:
                print(f"  - {result_dir.name}: No checkpoints")
    else:
        print("  No result directories found")
else:
    print(f"\nNo existing results directory found at: {results_dir}")

print(f"\nDevice for training: {device}")
