# AI Lung Cancer Detection - Google Colab Training

This notebook trains the ResNet50 model on GPU with balanced sampling to handle class imbalance.

**Steps:**
1. Mount Google Drive
2. Upload your dataset and code
3. Install dependencies
4. Train the model with GPU acceleration
5. Download the trained checkpoint

## 1. Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

## 2. Upload Project Files

**Option A: Upload ZIP to Google Drive**
1. Compress your project: `AI_Cancer_Detection/ai/` folder
2. Upload to Google Drive: `My Drive/AI_Cancer_Detection/`
3. Run the cell below

**Option B: Clone from GitHub** (if you have a repo)

In [None]:
# Option A: Extract from Google Drive
import os
import zipfile

# Update this path to your ZIP file location in Google Drive
zip_path = '/content/drive/MyDrive/AI_Cancer_Detection/ai.zip'
extract_path = '/content/ai'

if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('/content/')
    print(f"‚úÖ Extracted to {extract_path}")
else:
    print(f"‚ùå ZIP file not found at {zip_path}")
    print("Please upload your project ZIP to Google Drive first")

In [None]:
# Option B: Clone from GitHub (uncomment if using)
# !git clone https://github.com/YOUR_USERNAME/AI_Cancer_Detection.git /content/ai

In [None]:
# Verify project structure
!ls -la /content/ai/
!ls -la /content/ai/src/
!ls -la /content/ai/data/raw/

## 3. Install Dependencies

In [None]:
# Install required packages
!pip install -q albumentations==1.3.1
!pip install -q timm==0.9.12
!pip install -q scikit-learn==1.3.2
!pip install -q tensorboard==2.15.1
!pip install -q pyyaml==6.0.1
!pip install -q Pillow==10.1.0
!pip install -q matplotlib==3.8.2
!pip install -q seaborn==0.13.0

print("\n‚úÖ All dependencies installed")

## 4. Configure Training

In [None]:
# Change to project directory
import os
os.chdir('/content/ai')
print(f"Working directory: {os.getcwd()}")

In [None]:
# Verify config file
import yaml

config_path = '/content/ai/configs/config.yaml'
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

print("Current Configuration:")
print(f"  Model: {config['model']['architecture']}")
print(f"  Batch size: {config['data']['batch_size']}")
print(f"  Epochs: {config['training']['epochs']}")
print(f"  Learning rate: {config['training']['learning_rate']}")
print(f"  Image size: {config['data']['image_size']}")

In [None]:
# Optional: Update config for Colab GPU training
# Increase batch size for GPU
config['data']['batch_size'] = 32  # Increase from 16
config['data']['num_workers'] = 2  # Colab has 2 CPU cores
config['training']['epochs'] = 50  # Full training

# Save updated config
with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print("‚úÖ Config updated for GPU training")

## 5. Check Dataset

In [None]:
# Verify dataset and labels
import pandas as pd

labels_file = '/content/ai/data/raw/ChestXray_Binary_Labels.csv'
df = pd.read_csv(labels_file)

print(f"Total images: {len(df)}")
print(f"\nLabel distribution:")
print(df['BinaryLabel'].value_counts())
print(f"\nClass balance:")
print(df['BinaryLabel'].value_counts(normalize=True) * 100)

In [None]:
# Check if images exist
import os

dataset_path = '/content/ai/data/raw/train_data/train'
if os.path.exists(dataset_path):
    image_files = [f for f in os.listdir(dataset_path) if f.endswith('.png')]
    print(f"‚úÖ Found {len(image_files)} images in {dataset_path}")
else:
    print(f"‚ùå Dataset path not found: {dataset_path}")
    print("Please ensure your images are uploaded to Google Drive")

## 6. Start Training with Balanced Sampling

In [None]:
# Train the model
!python /content/ai/main.py train \
    --config /content/ai/configs/config.yaml \
    --experiment-name colab_resnet50_balanced \
    --device cuda \
    --seed 42

## 7. Monitor Training (Optional)

In [None]:
# Load TensorBoard
%load_ext tensorboard

# Find the latest experiment directory
import os
import glob

log_dirs = glob.glob('/content/ai/experiments/*/logs')
if log_dirs:
    latest_log = max(log_dirs, key=os.path.getctime)
    print(f"Loading TensorBoard from: {latest_log}")
    %tensorboard --logdir {latest_log}
else:
    print("No log directories found yet. Training may not have started.")

## 8. Check Training Results

In [None]:
# Find the latest experiment
import os
import glob

exp_dirs = glob.glob('/content/ai/experiments/colab_resnet50_balanced*')
if exp_dirs:
    latest_exp = max(exp_dirs, key=os.path.getctime)
    print(f"Latest experiment: {latest_exp}")
    
    # Check for checkpoint
    checkpoint_path = os.path.join(latest_exp, 'checkpoints', 'best_model.pth')
    if os.path.exists(checkpoint_path):
        import torch
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
        print(f"\n‚úÖ Best model checkpoint found!")
        print(f"Epoch: {checkpoint.get('epoch', 'N/A')}")
        print(f"Best AUC: {checkpoint.get('best_auc', 'N/A'):.4f}")
        print(f"Checkpoint size: {os.path.getsize(checkpoint_path) / (1024**2):.1f} MB")
    else:
        print("No checkpoint found yet")
else:
    print("No experiment directories found")

## 9. Evaluate Model

In [None]:
# Evaluate the trained model
import glob

exp_dirs = glob.glob('/content/ai/experiments/colab_resnet50_balanced*')
if exp_dirs:
    latest_exp = max(exp_dirs, key=os.path.getctime)
    checkpoint_path = os.path.join(latest_exp, 'checkpoints', 'best_model.pth')
    
    !python /content/ai/main.py evaluate \
        --config /content/ai/configs/config.yaml \
        --checkpoint {checkpoint_path} \
        --device cuda
else:
    print("No trained model found. Please complete training first.")

## 10. Download Trained Model

In [None]:
# Copy checkpoint to Google Drive for safekeeping
import shutil
import glob
import os

exp_dirs = glob.glob('/content/ai/experiments/colab_resnet50_balanced*')
if exp_dirs:
    latest_exp = max(exp_dirs, key=os.path.getctime)
    checkpoint_path = os.path.join(latest_exp, 'checkpoints', 'best_model.pth')
    
    # Create destination in Google Drive
    drive_dest = '/content/drive/MyDrive/AI_Cancer_Detection/trained_models/'
    os.makedirs(drive_dest, exist_ok=True)
    
    # Copy checkpoint
    dest_file = os.path.join(drive_dest, 'best_model_colab.pth')
    shutil.copy2(checkpoint_path, dest_file)
    
    print(f"‚úÖ Checkpoint saved to Google Drive: {dest_file}")
    print(f"Size: {os.path.getsize(dest_file) / (1024**2):.1f} MB")
    
    # Also copy the entire experiment folder
    exp_name = os.path.basename(latest_exp)
    exp_dest = os.path.join(drive_dest, exp_name)
    shutil.copytree(latest_exp, exp_dest, dirs_exist_ok=True)
    print(f"‚úÖ Full experiment saved to: {exp_dest}")
else:
    print("No experiment found to save")

In [None]:
# Download checkpoint directly to your computer
from google.colab import files
import glob

exp_dirs = glob.glob('/content/ai/experiments/colab_resnet50_balanced*')
if exp_dirs:
    latest_exp = max(exp_dirs, key=os.path.getctime)
    checkpoint_path = os.path.join(latest_exp, 'checkpoints', 'best_model.pth')
    
    print(f"Downloading: {checkpoint_path}")
    files.download(checkpoint_path)
else:
    print("No checkpoint found to download")

## 11. Visualize Results

In [None]:
# Plot training curves
import matplotlib.pyplot as plt
import json
import glob
import os

exp_dirs = glob.glob('/content/ai/experiments/colab_resnet50_balanced*')
if exp_dirs:
    latest_exp = max(exp_dirs, key=os.path.getctime)
    results_dir = os.path.join(latest_exp, 'results')
    
    # Look for evaluation results
    eval_files = glob.glob(os.path.join(results_dir, '**/evaluation_results.json'), recursive=True)
    if eval_files:
        with open(eval_files[0], 'r') as f:
            results = json.load(f)
        
        print("\nüìä Model Performance:")
        print(f"  AUC: {results.get('auc', 'N/A'):.4f}")
        print(f"  Accuracy: {results.get('accuracy', 'N/A'):.4f}")
        print(f"  Precision: {results.get('precision', 'N/A'):.4f}")
        print(f"  Recall: {results.get('recall', 'N/A'):.4f}")
        print(f"  F1 Score: {results.get('f1', 'N/A'):.4f}")
    
    # Look for confusion matrix image
    cm_files = glob.glob(os.path.join(results_dir, '**/confusion_matrix.png'), recursive=True)
    if cm_files:
        from IPython.display import Image, display
        print("\nConfusion Matrix:")
        display(Image(filename=cm_files[0]))
    
    # Look for ROC curve
    roc_files = glob.glob(os.path.join(results_dir, '**/roc_curve.png'), recursive=True)
    if roc_files:
        print("\nROC Curve:")
        display(Image(filename=roc_files[0]))
else:
    print("No results found")

## 12. Clean Up (Optional)

In [None]:
# Free up space by removing temporary files
# WARNING: Only run this after saving your model to Google Drive!

# !rm -rf /content/ai/experiments/*/logs  # Remove TensorBoard logs
# !rm -rf /content/ai/data/raw/train_data  # Remove dataset (keep in Drive)

print("‚ö†Ô∏è Uncomment the lines above to clean up space")