# Medical Imaging for Rural Areas - Google Colab Setup

This notebook will help you run your medical imaging project on Google Colab with free GPU access.

## Instructions:
1. Upload your project folder to Google Drive
2. Run this notebook in Google Colab
3. Enable GPU runtime: Runtime → Change runtime type → Hardware accelerator → GPU

## Step 1: Check GPU Availability and Mount Google Drive

In [None]:
# Check if GPU is available
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("GPU not available. Please enable GPU runtime.")
    print("Go to: Runtime → Change runtime type → Hardware accelerator → GPU")

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Navigate to your project folder (update this path based on where you uploaded your project)
import os
os.chdir('/content/drive/MyDrive/Medical-Imaging-for-Rural-Areas-Early-Disease-Detection')

# Verify we're in the right directory
print("Current directory:", os.getcwd())
print("Files in directory:")
!ls -la

## Step 2: Install Required Packages

In [None]:
# Install required packages
!pip install -r requirements.txt

# Install additional packages that might be needed
!pip install kaggle
!pip install pyyaml
!pip install opencv-python

## Step 3: Download Dataset (Optional - if not already uploaded)

In [None]:
# If you need to download the dataset from Kaggle
# First, you need to set up Kaggle API credentials
# Upload your kaggle.json file to Colab or create it manually

# Uncomment and run these lines if you need to download the dataset:
# from google.colab import files
# files.upload()  # Upload your kaggle.json file

# !mkdir -p ~/.kaggle
# !mv kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle datasets download -d jtiptj/chest-xray-pneumoniacovid19tuberculosis
# !unzip chest-xray-pneumoniacovid19tuberculosis.zip

## Step 4: Set Up Environment and Import Modules

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
import os
import sys

# Add src directory to path
sys.path.append('./src')

# Import your custom modules
from data.data_loader import load_dataset
from models.resnet50_model import ResNet50Model
from models.densenet121_model import DenseNet121Model
from models.efficientnet_model import EfficientNetB0Model
from training.train import ModelTrainer
from training.evaluate import ModelEvaluator

## Step 5: Load and Explore Dataset

In [None]:
# Configuration for Colab
DATA_DIR = "./chest_xray_merged"  # Updated path for Colab
BATCH_SIZE = 32  # Can use larger batch size in Colab with GPU
IMAGE_SIZE = (224, 224)
NUM_EPOCHS = 25
LEARNING_RATE = 0.001
NUM_CLASSES = 4

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load dataset
print("Loading dataset...")
train_loader, val_loader, test_loader, class_names = load_dataset(
    DATA_DIR, 
    batch_size=BATCH_SIZE, 
    image_size=IMAGE_SIZE,
    num_workers=2  # Can use more workers in Colab
)

print(f"Classes: {class_names}")
print(f"Training samples: {len(train_loader.dataset)}")
print(f"Validation samples: {len(val_loader.dataset)}")
print(f"Test samples: {len(test_loader.dataset)}")

## Step 6: Visualize Sample Images

In [None]:
# Visualize sample images from each class
def visualize_samples(data_loader, class_names, num_samples=8):
    # Get a batch of training data
    images, labels = next(iter(data_loader))
    
    # Create a figure
    fig, axes = plt.subplots(2, 4, figsize=(15, 8))
    fig.suptitle('Sample Images from Dataset', fontsize=16)
    
    for i in range(num_samples):
        if i >= len(images):
            break
            
        row = i // 4
        col = i % 4
        
        # Denormalize image for display
        img = images[i].permute(1, 2, 0)
        img = img * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
        img = torch.clamp(img, 0, 1)
        
        axes[row, col].imshow(img)
        axes[row, col].set_title(f'Class: {class_names[labels[i]]}')
        axes[row, col].axis('off')
    
    plt.tight_layout()
    plt.show()

# Visualize samples
visualize_samples(train_loader, class_names)

## Step 7: Train Models

In [None]:
# Models to train
models_to_train = {
    'ResNet50': ResNet50Model(num_classes=NUM_CLASSES),
    'DenseNet121': DenseNet121Model(num_classes=NUM_CLASSES),
    'EfficientNetB0': EfficientNetB0Model(num_classes=NUM_CLASSES)
}

results = {}

# Train each model
for model_name, model in models_to_train.items():
    print(f"\n{'='*50}")
    print(f"Training {model_name}")
    print(f"{'='*50}")
    
    # Create trainer
    trainer = ModelTrainer(model, device, NUM_CLASSES)
    
    # Train model
    trained_model = trainer.train_model(
        train_loader, 
        val_loader, 
        num_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE
    )
    
    # Evaluate model
    print(f"\nEvaluating {model_name}...")
    evaluator = ModelEvaluator(trained_model, device)
    y_true, y_pred, accuracy = evaluator.evaluate_model(test_loader, class_names)
    
    # Plot training history
    trainer.plot_training_history()
    
    # Save model to Google Drive
    model_path = f"./models/{model_name}_chest_xray_colab.pth"
    os.makedirs("./models", exist_ok=True)
    torch.save(trained_model.state_dict(), model_path)
    print(f"Model saved to {model_path}")
    
    results[model_name] = {
        'model': trained_model,
        'trainer': trainer,
        'accuracy': accuracy,
        'predictions': (y_true, y_pred)
    }

## Step 8: Compare Results and Visualize Performance

In [None]:
# Compare results
if results:
    print("\n" + "="*50)
    print("FINAL RESULTS COMPARISON")
    print("="*50)
    
    model_names = []
    accuracies = []
    
    for model_name, result in results.items():
        print(f"{model_name}: {result['accuracy']:.4f}")
        model_names.append(model_name)
        accuracies.append(result['accuracy'])
    
    # Find best model
    best_model = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"\nBest model: {best_model[0]} with accuracy: {best_model[1]['accuracy']:.4f}")
    
    # Plot comparison
    plt.figure(figsize=(10, 6))
    bars = plt.bar(model_names, accuracies, color=['skyblue', 'lightgreen', 'lightcoral'])
    plt.title('Model Performance Comparison', fontsize=16, fontweight='bold')
    plt.xlabel('Models', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.ylim(0, 1)
    
    # Add value labels on bars
    for bar, acc in zip(bars, accuracies):
        plt.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 0.01, 
                f'{acc:.4f}', ha='center', va='bottom', fontweight='bold')
    
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

print("\nTraining completed successfully in Google Colab!")

## Step 9: Download Trained Models (Optional)

In [None]:
# Download trained models to your local machine
from google.colab import files

# List available model files
model_files = [f for f in os.listdir('./models') if f.endswith('.pth')]
print("Available model files:")
for file in model_files:
    print(f"- {file}")

# Uncomment to download specific model
# files.download('./models/ResNet50_chest_xray_colab.pth')
# files.download('./models/DenseNet121_chest_xray_colab.pth')
# files.download('./models/EfficientNetB0_chest_xray_colab.pth')