# Colorectal Histology Classification - Training on Google Colab

This notebook trains the ResNet-18 model for colorectal tissue type classification (8 classes).

**Before starting:**
1. Change runtime to GPU: Runtime → Change runtime type → T4 GPU
2. Upload dataset to Google Drive in folder: `colorectal_dataset`
3. Upload `project_for_colab.zip` when prompted

In [None]:
# Cell 1: Check GPU
!nvidia-smi
import torch
print(f"\n{'='*60}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print(f"{'='*60}\n")

In [None]:
# Cell 2: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
print("[OK] Google Drive mounted!")

In [None]:
# Cell 3: Upload and Extract Project
from google.colab import files
import os

# Clean up any existing extraction
!rm -rf /content/medical-image-classification

print("Please upload project_for_colab.zip:")
uploaded = files.upload()

# Extract to specific directory
!mkdir -p /content/medical-image-classification
!unzip -q project_for_colab.zip -d /content/medical-image-classification
%cd /content/medical-image-classification

print("\n[OK] Project extracted!")
print("\nProject structure:")
!ls -la
print("\nCurrent directory:")
!pwd

In [None]:
# Cell 4: Install Dependencies
print("Installing dependencies...")
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q tensorboard pillow numpy pandas scikit-learn matplotlib seaborn tqdm
!pip install -q opencv-python-headless scikit-image albumentations
print("[OK] All dependencies installed!")

In [None]:
# Cell 5: Setup Dataset
print("Setting up colorectal dataset...")

# Create directories
!mkdir -p data/colorectal

# Remove any existing link
!rm -rf data/colorectal/Kather_texture_2016_image_tiles_5000

# Link to Google Drive dataset
# Note: The colorectal dataset has a specific nested structure
!ln -s /content/drive/MyDrive/colorectal_dataset data/colorectal/Kather_texture_2016_image_tiles_5000

# Verify dataset
print("\nDataset structure:")
!ls data/colorectal/Kather_texture_2016_image_tiles_5000/Kather_texture_2016_image_tiles_5000/

# Count images
from pathlib import Path
dataset_path = Path('data/colorectal/Kather_texture_2016_image_tiles_5000/Kather_texture_2016_image_tiles_5000')
total_count = 0
classes = []

print(f"\n{'='*60}")
for class_dir in sorted(dataset_path.iterdir()):
    if class_dir.is_dir():
        count = len(list(class_dir.glob('*.tif')))
        total_count += count
        classes.append(class_dir.name)
        print(f"{class_dir.name}: {count} images")

print(f"\nTotal images: {total_count}")
print(f"Number of classes: {len(classes)}")
print(f"{'='*60}")
print("\n[OK] Dataset ready!")

In [None]:
# Cell 6: Verify Setup
print("Verifying setup...\n")

# Check config
!python -c "from config import get_config; config = get_config('colorectal'); print('Config loaded:', config['dataset']['name'])"

# Check hyperparameters
!cat results/phase1/best_hyperparameters.json

print("\n[OK] Setup verified!")

In [None]:
# Cell 7: START TRAINING!
print("="*60)
print("STARTING COLORECTAL TISSUE CLASSIFICATION")
print("="*60)
print("\nThis will take approximately 1.5-2 hours on Colab GPU (8 classes).")
print("You can close this tab - training will continue in background.\n")

!python train.py --dataset colorectal --use_optimized --device cuda

print("\n" + "="*60)
print("[OK] TRAINING COMPLETE!")
print("="*60)

In [None]:
# Cell 8: Check Results
print("Training Results:\n")

# List checkpoints
print("Saved checkpoints:")
!ls -lh models/checkpoints/

# Show training history if available
import json
if os.path.exists('results/phase1/training_history.json'):
    with open('results/phase1/training_history.json', 'r') as f:
        history = json.load(f)
    print(f"\nFinal Training Accuracy: {history['train_acc'][-1]:.2f}%")
    print(f"Final Validation Accuracy: {history['val_acc'][-1]:.2f}%")
else:
    print("\nNo training history found.")

In [None]:
# Cell 9: Download Trained Model
from google.colab import files

print("Downloading trained model...\n")

# Download best model
if os.path.exists('models/checkpoints/best_model.pth'):
    files.download('models/checkpoints/best_model.pth')
    print("[OK] best_model.pth downloaded!")
else:
    print("[X] best_model.pth not found!")

# Download training history
if os.path.exists('results/phase1/training_history.json'):
    files.download('results/phase1/training_history.json')
    print("[OK] training_history.json downloaded!")

print("\n[OK] All files downloaded!")
print("\nNext steps:")
print("1. Place best_model.pth in your local models/checkpoints_colorectal/ directory")
print("2. Run evaluation: python evaluate.py --dataset colorectal --model_path models/checkpoints_colorectal/best_model.pth")

In [None]:
# Cell 10 (Optional): Backup to Google Drive
print("Backing up results to Google Drive...")

# Create backup directory in Drive
!mkdir -p /content/drive/MyDrive/colorectal_results

# Copy trained model
!cp models/checkpoints/best_model.pth /content/drive/MyDrive/colorectal_results/

# Copy training history
!cp results/phase1/training_history.json /content/drive/MyDrive/colorectal_results/ 2>/dev/null || echo "No history file"

# Copy all checkpoints
!cp models/checkpoints/*.pth /content/drive/MyDrive/colorectal_results/ 2>/dev/null || echo "No checkpoints"

print("\n[OK] Results backed up to Google Drive: MyDrive/colorectal_results/")