# ü´Å Pneumonia Detection Training - Google Drive Version

This notebook trains your pneumonia detection models using your dataset from Google Drive.

**Advantages of Google Drive approach:**
- No need to upload large files each time
- Faster access to your dataset
- Can reuse the same dataset across multiple sessions
- More reliable for large datasets

**Prerequisites:**
1. Upload your `chest_xray` folder to Google Drive
2. Note the path where you uploaded it

## üöÄ Step 1: Setup Environment

In [None]:
# Check GPU availability
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))
print("CUDA available:", tf.test.is_built_with_cuda())

# Enable GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("‚úÖ GPU memory growth enabled")
    except RuntimeError as e:
        print(e)
else:
    print("‚ö†Ô∏è No GPU found, using CPU (training will be slower)")

In [None]:
# Install required packages
!pip install seaborn opencv-python-headless

# Import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, Conv2D, MaxPooling2D, UpSampling2D, BatchNormalization, Activation, Concatenate
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
import zipfile
from google.colab import files, drive
import time
import shutil

print("‚úÖ All packages imported successfully!")

## üìÅ Step 2: Mount Google Drive and Setup Dataset

In [None]:
# Mount Google Drive
print("üìÅ Mounting Google Drive...")
drive.mount('/content/drive')
print("‚úÖ Google Drive mounted successfully!")

# List contents of Google Drive to help you find your dataset
print("\nüìÇ Contents of your Google Drive:")
drive_root = '/content/drive/MyDrive'
if os.path.exists(drive_root):
    for item in sorted(os.listdir(drive_root)):
        item_path = os.path.join(drive_root, item)
        item_type = "üìÅ" if os.path.isdir(item_path) else "üìÑ"
        print(f"  {item_type} {item}")
else:
    print("‚ùå Could not access Google Drive")

In [None]:
# Configure your dataset path in Google Drive
# MODIFY THIS PATH to match where you uploaded your chest_xray folder

# Common paths (uncomment the one that matches your setup):
GDRIVE_DATASET_PATH = '/content/drive/MyDrive/chest_xray'  # If you uploaded chest_xray directly to Drive root
# GDRIVE_DATASET_PATH = '/content/drive/MyDrive/pneumonia_project/chest_xray'  # If inside a project folder
# GDRIVE_DATASET_PATH = '/content/drive/MyDrive/data/chest_xray'  # If inside a data folder

print(f"Looking for dataset at: {GDRIVE_DATASET_PATH}")

# Verify the dataset exists
if os.path.exists(GDRIVE_DATASET_PATH):
    print("‚úÖ Dataset found in Google Drive!")
    
    # Check subdirectories
    subdirs = ['train', 'val', 'test']
    for subdir in subdirs:
        subdir_path = os.path.join(GDRIVE_DATASET_PATH, subdir)
        if os.path.exists(subdir_path):
            print(f"  ‚úÖ {subdir} directory found")
        else:
            print(f"  ‚ùå {subdir} directory missing")
else:
    print("‚ùå Dataset not found at the specified path!")
    print("\nüîß To fix this:")
    print("1. Make sure you've uploaded your chest_xray folder to Google Drive")
    print("2. Update the GDRIVE_DATASET_PATH variable above to match your folder location")
    print("3. Re-run this cell")
    
    # Show available directories to help user find the right path
    print("\nüìÇ Available directories in your Drive:")
    for item in os.listdir(drive_root):
        item_path = os.path.join(drive_root, item)
        if os.path.isdir(item_path):
            print(f"  üìÅ {item}")
            # Check if this directory contains chest_xray
            chest_xray_path = os.path.join(item_path, 'chest_xray')
            if os.path.exists(chest_xray_path):
                print(f"    üí° Found chest_xray in: /content/drive/MyDrive/{item}/chest_xray")

In [None]:
# Copy dataset from Google Drive to local Colab storage for faster access
# This is optional but recommended for better performance

COPY_TO_LOCAL = True  # Set to False if you want to use Drive directly (slower but saves space)

if COPY_TO_LOCAL and os.path.exists(GDRIVE_DATASET_PATH):
    print("üìã Copying dataset from Google Drive to local Colab storage...")
    print("This will take a few minutes but will make training faster.")
    
    LOCAL_DATASET_PATH = '/content/chest_xray'
    
    # Remove existing local copy if it exists
    if os.path.exists(LOCAL_DATASET_PATH):
        shutil.rmtree(LOCAL_DATASET_PATH)
        print("üóëÔ∏è Removed existing local copy")
    
    # Copy from Drive to local
    start_time = time.time()
    shutil.copytree(GDRIVE_DATASET_PATH, LOCAL_DATASET_PATH)
    copy_time = time.time() - start_time
    
    print(f"‚úÖ Dataset copied to local storage in {copy_time:.1f} seconds")
    DATASET_PATH = LOCAL_DATASET_PATH
    
else:
    print("üìÅ Using dataset directly from Google Drive")
    DATASET_PATH = GDRIVE_DATASET_PATH

print(f"\nüéØ Using dataset path: {DATASET_PATH}")

In [None]:
# Verify dataset structure and count images
def analyze_dataset(dataset_path):
    """Analyze and display dataset structure"""
    if not os.path.exists(dataset_path):
        print(f"‚ùå Dataset not found at: {dataset_path}")
        return False
    
    splits = ['train', 'val', 'test']
    classes = ['NORMAL', 'PNEUMONIA']
    total_images = 0
    
    print(f"\nüìä Dataset Analysis: {dataset_path}")
    print("="*60)
    
    for split in splits:
        split_path = os.path.join(dataset_path, split)
        if os.path.exists(split_path):
            normal_path = os.path.join(split_path, 'NORMAL')
            pneumonia_path = os.path.join(split_path, 'PNEUMONIA')
            
            normal_count = 0
            pneumonia_count = 0
            
            if os.path.exists(normal_path):
                normal_files = [f for f in os.listdir(normal_path) 
                              if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                normal_count = len(normal_files)
            
            if os.path.exists(pneumonia_path):
                pneumonia_files = [f for f in os.listdir(pneumonia_path) 
                                 if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
                pneumonia_count = len(pneumonia_files)
            
            split_total = normal_count + pneumonia_count
            total_images += split_total
            
            print(f"{split.upper():>10}: Normal={normal_count:>4}, Pneumonia={pneumonia_count:>4}, Total={split_total:>4}")
            
            if split_total > 0:
                normal_pct = (normal_count / split_total) * 100
                pneumonia_pct = (pneumonia_count / split_total) * 100
                print(f"{'':>10}  Distribution: Normal {normal_pct:.1f}% | Pneumonia {pneumonia_pct:.1f}%")
        else:
            print(f"{split.upper():>10}: ‚ùå Directory not found")
    
    print("="*60)
    print(f"üìä TOTAL IMAGES: {total_images:,}")
    
    # Show sample file names
    sample_path = os.path.join(dataset_path, 'train', 'NORMAL')
    if os.path.exists(sample_path):
        sample_files = os.listdir(sample_path)[:3]
        print(f"\nüìÑ Sample files: {', '.join(sample_files)}")
    
    return total_images > 0

# Analyze the dataset
dataset_ready = analyze_dataset(DATASET_PATH)

if dataset_ready:
    print("\n‚úÖ Dataset is ready for training!")
else:
    print("\n‚ùå Dataset setup failed. Please check your Google Drive upload.")
    raise Exception("Dataset not ready")

## ‚öôÔ∏è Step 3: Configuration and Data Loading

In [None]:
# Colab-optimized configuration
class ColabConfig:
    # Data paths - use the dataset path we found
    DATA_DIR = DATASET_PATH
    TRAIN_DIR = os.path.join(DATA_DIR, "train")
    VAL_DIR = os.path.join(DATA_DIR, "val")
    TEST_DIR = os.path.join(DATA_DIR, "test")
    
    # Model paths
    MODEL_DIR = "models"
    AUTOENCODER_PATH = os.path.join(MODEL_DIR, "autoencoder_colab.h5")
    CLASSIFIER_PATH = os.path.join(MODEL_DIR, "resnet_classifier_colab.h5")
    HYBRID_MODEL_PATH = os.path.join(MODEL_DIR, "hybrid_model_colab.h5")
    
    # Image parameters - Optimized for Colab GPU
    IMG_HEIGHT = 224
    IMG_WIDTH = 224
    IMG_CHANNELS = 3
    
    # Training parameters - GPU optimized
    BATCH_SIZE = 32
    EPOCHS_AUTOENCODER = 20
    EPOCHS_CLASSIFIER = 15
    LEARNING_RATE = 0.001
    
    # Classes
    CLASSES = ['NORMAL', 'PNEUMONIA']
    NUM_CLASSES = len(CLASSES)

config = ColabConfig()
os.makedirs(config.MODEL_DIR, exist_ok=True)

print("‚úÖ Configuration set up!")
print(f"Dataset directory: {config.DATA_DIR}")
print(f"Models will be saved to: {config.MODEL_DIR}")
print(f"Image size: {config.IMG_HEIGHT}x{config.IMG_WIDTH}")
print(f"Batch size: {config.BATCH_SIZE}")

In [None]:
# Data generators
def create_data_generators():
    print("Creating data generators...")
    
    # Training data generator with augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        zoom_range=0.2,
        fill_mode='nearest'
    )
    
    # Validation and test data generators
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    
    train_generator = train_datagen.flow_from_directory(
        config.TRAIN_DIR,
        target_size=(config.IMG_HEIGHT, config.IMG_WIDTH),
        batch_size=config.BATCH_SIZE,
        class_mode='categorical',
        classes=config.CLASSES
    )
    
    val_generator = val_test_datagen.flow_from_directory(
        config.VAL_DIR,
        target_size=(config.IMG_HEIGHT, config.IMG_WIDTH),
        batch_size=config.BATCH_SIZE,
        class_mode='categorical',
        classes=config.CLASSES
    )
    
    test_generator = val_test_datagen.flow_from_directory(
        config.TEST_DIR,
        target_size=(config.IMG_HEIGHT, config.IMG_WIDTH),
        batch_size=config.BATCH_SIZE,
        class_mode='categorical',
        classes=config.CLASSES,
        shuffle=False
    )
    
    return train_generator, val_generator, test_generator

def create_autoencoder_generators():
    print("Creating autoencoder data generators...")
    
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True
    )
    
    val_datagen = ImageDataGenerator(rescale=1./255)
    
    train_generator = train_datagen.flow_from_directory(
        config.TRAIN_DIR,
        target_size=(config.IMG_HEIGHT, config.IMG_WIDTH),
        batch_size=config.BATCH_SIZE,
        class_mode='input'
    )
    
    val_generator = val_datagen.flow_from_directory(
        config.VAL_DIR,
        target_size=(config.IMG_HEIGHT, config.IMG_WIDTH),
        batch_size=config.BATCH_SIZE,
        class_mode='input'
    )
    
    return train_generator, val_generator

# Test data generators
try:
    train_gen_test, val_gen_test, test_gen_test = create_data_generators()
    print("‚úÖ Data generators created successfully!")
    print(f"Training samples: {train_gen_test.samples:,}")
    print(f"Validation samples: {val_gen_test.samples:,}")
    print(f"Test samples: {test_gen_test.samples:,}")
except Exception as e:
    print(f"‚ùå Error creating data generators: {str(e)}")
    raise

## üîß Step 4: Model Definitions

In [None]:
# Autoencoder model
def build_autoencoder():
    input_img = Input(shape=(config.IMG_HEIGHT, config.IMG_WIDTH, config.IMG_CHANNELS))
    
    # Encoder
    x = Conv2D(64, (3, 3), padding='same')(input_img)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    
    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    
    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((2, 2), padding='same')(x)
    
    x = Conv2D(512, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    encoded = MaxPooling2D((2, 2), padding='same')(x)
    
    # Decoder
    x = Conv2D(512, (3, 3), padding='same')(encoded)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = UpSampling2D((2, 2))(x)
    
    x = Conv2D(256, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = UpSampling2D((2, 2))(x)
    
    x = Conv2D(128, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = UpSampling2D((2, 2))(x)
    
    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = UpSampling2D((2, 2))(x)
    
    decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
    
    autoencoder = Model(input_img, decoded)
    encoder = Model(input_img, encoded)
    
    return autoencoder, encoder

# ResNet50 classifier
def build_resnet_classifier():
    input_layer = Input(shape=(config.IMG_HEIGHT, config.IMG_WIDTH, config.IMG_CHANNELS))
    
    base_model = ResNet50(
        weights='imagenet',
        include_top=False,
        input_tensor=input_layer
    )
    
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    predictions = Dense(config.NUM_CLASSES, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=predictions)
    
    return model, base_model

# Hybrid model
def build_hybrid_model(encoder_model):
    input_layer = Input(shape=(config.IMG_HEIGHT, config.IMG_WIDTH, config.IMG_CHANNELS))
    
    # Freeze encoder
    for layer in encoder_model.layers:
        layer.trainable = False
    
    encoder_features = encoder_model(input_layer)
    encoder_features_flat = GlobalAveragePooling2D()(encoder_features)
    
    # ResNet50 branch
    resnet_base = ResNet50(
        weights='imagenet',
        include_top=False,
        input_tensor=input_layer
    )
    
    # Freeze early ResNet layers
    for layer in resnet_base.layers[:-10]:
        layer.trainable = False
    
    resnet_features = resnet_base.output
    resnet_features_flat = GlobalAveragePooling2D()(resnet_features)
    
    # Combine features
    combined_features = Concatenate()([encoder_features_flat, resnet_features_flat])
    
    # Classification head
    x = Dense(1024, activation='relu')(combined_features)
    x = Dropout(0.5)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.2)(x)
    predictions = Dense(config.NUM_CLASSES, activation='softmax')(x)
    
    model = Model(inputs=input_layer, outputs=predictions)
    
    return model

print("‚úÖ All model architectures defined!")

## üöÄ Step 5: Train Autoencoder

In [None]:
print("üîß TRAINING AUTOENCODER")
print("="*50)

# Build and compile autoencoder
autoencoder, encoder = build_autoencoder()
autoencoder.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE),
    loss='mse',
    metrics=['mae']
)

print("Autoencoder Summary:")
autoencoder.summary()

# Create data generators
train_gen_ae, val_gen_ae = create_autoencoder_generators()

# Callbacks
callbacks_ae = [
    ModelCheckpoint(
        config.AUTOENCODER_PATH,
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_loss',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    )
]

# Train autoencoder
start_time = time.time()

history_ae = autoencoder.fit(
    train_gen_ae,
    epochs=config.EPOCHS_AUTOENCODER,
    validation_data=val_gen_ae,
    callbacks=callbacks_ae,
    verbose=1
)

ae_time = time.time() - start_time
print(f"‚úÖ Autoencoder training completed in {ae_time/60:.1f} minutes")

# Save encoder separately
encoder_path = os.path.join(config.MODEL_DIR, "encoder_colab.h5")
encoder.save(encoder_path)
print(f"‚úÖ Encoder saved to {encoder_path}")

# Also save to Google Drive as backup
gdrive_models_dir = '/content/drive/MyDrive/pneumonia_models'
os.makedirs(gdrive_models_dir, exist_ok=True)
shutil.copy2(config.AUTOENCODER_PATH, gdrive_models_dir)
shutil.copy2(encoder_path, gdrive_models_dir)
print(f"‚úÖ Models backed up to Google Drive: {gdrive_models_dir}")

## üß† Step 6: Train ResNet50 Classifier

In [None]:
print("üß† TRAINING RESNET50 CLASSIFIER")
print("="*50)

# Build classifier
classifier, base_model = build_resnet_classifier()

# Create data generators
train_gen, val_gen, test_gen = create_data_generators()

# Phase 1: Frozen base layers
print("Phase 1: Training with frozen base layers...")
for layer in base_model.layers:
    layer.trainable = False

classifier.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

callbacks_phase1 = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=3,
        restore_best_weights=True,
        verbose=1
    )
]

history_phase1 = classifier.fit(
    train_gen,
    epochs=5,
    validation_data=val_gen,
    callbacks=callbacks_phase1,
    verbose=1
)

# Phase 2: Fine-tuning
print("Phase 2: Fine-tuning with unfrozen layers...")
for layer in base_model.layers:
    layer.trainable = True

classifier.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE/10),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

callbacks_phase2 = [
    ModelCheckpoint(
        config.CLASSIFIER_PATH,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.3,
        patience=3,
        min_lr=1e-8,
        verbose=1
    )
]

start_time = time.time()

history_phase2 = classifier.fit(
    train_gen,
    epochs=config.EPOCHS_CLASSIFIER,
    validation_data=val_gen,
    callbacks=callbacks_phase2,
    verbose=1
)

classifier_time = time.time() - start_time
print(f"‚úÖ Classifier training completed in {classifier_time/60:.1f} minutes")

# Evaluate on test set
print("Evaluating classifier on test set...")
test_loss, test_acc, test_precision, test_recall = classifier.evaluate(test_gen, verbose=1)
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")

# Backup to Google Drive
shutil.copy2(config.CLASSIFIER_PATH, gdrive_models_dir)
print(f"‚úÖ Classifier backed up to Google Drive")

## üîÄ Step 7: Train Hybrid Model

In [None]:
print("üîÄ TRAINING HYBRID MODEL")
print("="*50)

# Load pre-trained encoder
encoder = tf.keras.models.load_model(encoder_path)

# Build hybrid model
hybrid_model = build_hybrid_model(encoder)

hybrid_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

print("Hybrid Model Summary:")
hybrid_model.summary()

# Callbacks
callbacks_hybrid = [
    ModelCheckpoint(
        config.HYBRID_MODEL_PATH,
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=3,
        min_lr=1e-7,
        verbose=1
    )
]

start_time = time.time()

history_hybrid = hybrid_model.fit(
    train_gen,
    epochs=config.EPOCHS_CLASSIFIER,
    validation_data=val_gen,
    callbacks=callbacks_hybrid,
    verbose=1
)

hybrid_time = time.time() - start_time
print(f"‚úÖ Hybrid model training completed in {hybrid_time/60:.1f} minutes")

# Evaluate on test set
print("Evaluating hybrid model on test set...")
test_loss, test_acc, test_precision, test_recall = hybrid_model.evaluate(test_gen, verbose=1)
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")

# Backup to Google Drive
shutil.copy2(config.HYBRID_MODEL_PATH, gdrive_models_dir)
print(f"‚úÖ Hybrid model backed up to Google Drive")

## üìä Step 8: Training Results and Visualization

In [None]:
# Plot training histories
def plot_training_history(history, title):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Accuracy
    if 'accuracy' in history.history:
        axes[0, 0].plot(history.history['accuracy'], label='Training Accuracy')
        axes[0, 0].plot(history.history['val_accuracy'], label='Validation Accuracy')
        axes[0, 0].set_title(f'{title} - Accuracy')
        axes[0, 0].set_xlabel('Epoch')
        axes[0, 0].set_ylabel('Accuracy')
        axes[0, 0].legend()
        axes[0, 0].grid(True)
    
    # Loss
    axes[0, 1].plot(history.history['loss'], label='Training Loss')
    axes[0, 1].plot(history.history['val_loss'], label='Validation Loss')
    axes[0, 1].set_title(f'{title} - Loss')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Loss')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Precision
    if 'precision' in history.history:
        axes[1, 0].plot(history.history['precision'], label='Training Precision')
        axes[1, 0].plot(history.history['val_precision'], label='Validation Precision')
        axes[1, 0].set_title(f'{title} - Precision')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Precision')
        axes[1, 0].legend()
        axes[1, 0].grid(True)
    
    # Recall
    if 'recall' in history.history:
        axes[1, 1].plot(history.history['recall'], label='Training Recall')
        axes[1, 1].plot(history.history['val_recall'], label='Validation Recall')
        axes[1, 1].set_title(f'{title} - Recall')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Recall')
        axes[1, 1].legend()
        axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot all training histories
plot_training_history(history_ae, 'Autoencoder')
plot_training_history(history_phase2, 'ResNet50 Classifier')
plot_training_history(history_hybrid, 'Hybrid Model')

## üì• Step 9: Download Trained Models

In [None]:
# Create a ZIP file with all trained models
def create_models_zip():
    zip_filename = 'trained_pneumonia_models_gdrive.zip'
    
    with zipfile.ZipFile(zip_filename, 'w') as zipf:
        # Add all model files
        model_files = [
            config.AUTOENCODER_PATH,
            encoder_path,
            config.CLASSIFIER_PATH,
            config.HYBRID_MODEL_PATH
        ]
        
        for model_file in model_files:
            if os.path.exists(model_file):
                zipf.write(model_file, os.path.basename(model_file))
                file_size = os.path.getsize(model_file) / (1024 * 1024)  # MB
                print(f"‚úÖ Added {os.path.basename(model_file)} ({file_size:.1f} MB) to ZIP")
            else:
                print(f"‚ö†Ô∏è Model file not found: {model_file}")
    
    return zip_filename

# Create and download models ZIP
models_zip = create_models_zip()
print(f"\nüì¶ Models packaged in: {models_zip}")

# Show file sizes
if os.path.exists(models_zip):
    zip_size = os.path.getsize(models_zip) / (1024 * 1024)  # Convert to MB
    print(f"ZIP file size: {zip_size:.1f} MB")

# Download the ZIP file
files.download(models_zip)
print("‚úÖ Models downloaded to your computer!")

print(f"\nüíæ Models are also backed up in your Google Drive at:")
print(f"   {gdrive_models_dir}")
print("\nüìã Next steps:")
print("1. Extract the ZIP file to your local 'models' directory")
print("2. Use colab_local_inference.py for predictions on your laptop")
print("3. Example: python colab_local_inference.py --image path/to/xray.jpg")

## üîç Step 10: Test Predictions

In [None]:
# Test the hybrid model with sample images
def test_prediction(model, test_generator, num_samples=5):
    # Get a batch of test images
    test_generator.reset()
    x_batch, y_batch = next(test_generator)
    
    # Make predictions
    predictions = model.predict(x_batch[:num_samples])
    
    # Display results
    fig, axes = plt.subplots(1, num_samples, figsize=(20, 4))
    
    for i in range(num_samples):
        # Display image
        axes[i].imshow(x_batch[i])
        axes[i].axis('off')
        
        # Get prediction
        pred_class = np.argmax(predictions[i])
        pred_confidence = predictions[i][pred_class]
        true_class = np.argmax(y_batch[i])
        
        pred_label = config.CLASSES[pred_class]
        true_label = config.CLASSES[true_class]
        
        # Set title with prediction
        color = 'green' if pred_class == true_class else 'red'
        axes[i].set_title(f'Pred: {pred_label}\nTrue: {true_label}\nConf: {pred_confidence:.2%}', 
                         color=color, fontsize=10)
    
    plt.tight_layout()
    plt.show()

# Test the hybrid model
print("üîç Testing Hybrid Model Predictions:")
test_prediction(hybrid_model, test_gen)

print("\nüéâ TRAINING COMPLETED SUCCESSFULLY!")
print("="*60)
print("üìä Training Summary:")
print(f"- Autoencoder training time: {ae_time/60:.1f} minutes")
print(f"- Classifier training time: {classifier_time/60:.1f} minutes")
print(f"- Hybrid training time: {hybrid_time/60:.1f} minutes")
print(f"- Total training time: {(ae_time + classifier_time + hybrid_time)/60:.1f} minutes")
print(f"\nüíæ Models saved locally and backed up to Google Drive")
print(f"üì• Models downloaded to your computer")
print("\n‚úÖ Ready for local inference on your laptop!")