# Stage 2: Hyperparameter Tuning with Optuna

## Innovation Feature 5: Automated Hyperparameter Optimization

This notebook optimizes **DenseNet121, Xception, and ResNet50** using Optuna.

**Requirements:**
- ‚ö° **T4 GPU** enabled (Runtime ‚Üí Change runtime type ‚Üí T4 GPU)
- üì¶ Upload `enhanced_dataset.zip` from Stage 1
- ‚è±Ô∏è Expected runtime: **2-4 hours**

In [None]:
# Install required libraries
!pip install optuna tensorflow -q

In [None]:
# Imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import DenseNet121, Xception, ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import optuna
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import zipfile
from pathlib import Path

# Check GPU
print('TensorFlow version:', tf.__version__)
print('GPU available:', tf.config.list_physical_devices('GPU'))
if not tf.config.list_physical_devices('GPU'):
    print('‚ö†Ô∏è  WARNING: No GPU detected! Enable T4 GPU in Runtime ‚Üí Change runtime type')

## 1. Extract Enhanced Dataset

In [None]:
# Extract enhanced dataset
dataset_zip = 'enhanced_dataset.zip'
dataset_folder = 'enhanced_dataset'

if os.path.exists(dataset_zip):
    print('Extracting enhanced dataset...')
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall('.')
    print(f'‚úì Extracted to {dataset_folder}/')
elif os.path.exists(dataset_folder):
    print(f'‚úì Dataset folder already exists')
else:
    print('ERROR: Please upload enhanced_dataset.zip from Stage 1!')

In [None]:
# Check dataset structure
classes = sorted([d for d in os.listdir(dataset_folder) if os.path.isdir(os.path.join(dataset_folder, d))])
print(f'Found {len(classes)} classes: {classes}')

# Count images per class
for cls in classes:
    cls_path = os.path.join(dataset_folder, cls)
    count = len([f for f in os.listdir(cls_path) if f.endswith(('.jpg', '.png', '.jpeg'))])
    print(f'  {cls}: {count} images')

## 2. Data Preparation

In [None]:
# Hyperparameters
IMG_SIZE = 224
NUM_CLASSES = len(classes)
SEED = 42

# Data augmentation for training
data_augmentation = keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])

# Create datasets with 70/15/15 split
def create_datasets(batch_size=32):
    # Load full dataset
    full_ds = tf.keras.utils.image_dataset_from_directory(
        dataset_folder,
        image_size=(IMG_SIZE, IMG_SIZE),
        batch_size=batch_size,
        seed=SEED,
        shuffle=True
    )
    
    # Calculate split sizes
    total_batches = tf.data.experimental.cardinality(full_ds).numpy()
    train_size = int(0.7 * total_batches)
    val_size = int(0.15 * total_batches)
    
    # Split dataset
    train_ds = full_ds.take(train_size)
    remaining = full_ds.skip(train_size)
    val_ds = remaining.take(val_size)
    test_ds = remaining.skip(val_size)
    
    # Apply augmentation to training set
    train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))
    
    # Normalize all datasets
    normalization_layer = layers.Rescaling(1./255)
    train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
    val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
    test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))
    
    # Optimize performance
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)
    
    return train_ds, val_ds, test_ds

print('Dataset creation function ready')

## 3. Model Building Functions

In [None]:
def create_model(model_name, dropout_rate=0.3):
    """
    Create transfer learning model
    
    Args:
        model_name: 'densenet', 'xception', or 'resnet'
        dropout_rate: Dropout probability
    """
    # Load base model (pre-trained on ImageNet)
    if model_name == 'densenet':
        base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    elif model_name == 'xception':
        base_model = Xception(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    elif model_name == 'resnet':
        base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    else:
        raise ValueError(f'Unknown model: {model_name}')
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Build full model
    inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(dropout_rate)(x)
    outputs = layers.Dense(NUM_CLASSES, activation='softmax')(x)
    
    model = keras.Model(inputs, outputs)
    return model

print('Model building function ready')

## 4. Optuna Objective Function

In [None]:
def objective(trial, model_name):
    """
    Optuna objective function for hyperparameter tuning
    """
    # Clear session to avoid memory issues
    keras.backend.clear_session()
    
    # Suggest hyperparameters
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.6)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    optimizer_name = trial.suggest_categorical('optimizer', ['adam', 'nadam', 'rmsprop'])
    
    # Create datasets with suggested batch size
    train_ds, val_ds, _ = create_datasets(batch_size)
    
    # Create model
    model = create_model(model_name, dropout_rate)
    
    # Select optimizer
    if optimizer_name == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == 'nadam':
        optimizer = keras.optimizers.Nadam(learning_rate=learning_rate)
    else:
        optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate)
    
    # Compile model
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Callbacks
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    
    # Train model (limited epochs for speed)
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=10,  # Limited epochs for Optuna trials
        callbacks=[early_stop],
        verbose=0
    )
    
    # Return best validation accuracy
    best_val_acc = max(history.history['val_accuracy'])
    return best_val_acc

print('Optuna objective function ready')

## 5. Optimize Each Model

### 5.1 DenseNet121 Optimization

In [None]:
print('='*50)
print('OPTIMIZING DENSENET121')
print('='*50)

# Create Optuna study
study_densenet = optuna.create_study(direction='maximize', study_name='densenet121_tuning')

# Run optimization (20 trials)
study_densenet.optimize(lambda trial: objective(trial, 'densenet'), n_trials=20)

# Print results
print('\n' + '='*50)
print('DENSENET121 - BEST HYPERPARAMETERS')
print('='*50)
print(f'Best validation accuracy: {study_densenet.best_value:.4f}')
print('Best hyperparameters:')
for key, value in study_densenet.best_params.items():
    print(f'  {key}: {value}')

### 5.2 Xception Optimization

In [None]:
print('='*50)
print('OPTIMIZING XCEPTION')
print('='*50)

study_xception = optuna.create_study(direction='maximize', study_name='xception_tuning')
study_xception.optimize(lambda trial: objective(trial, 'xception'), n_trials=20)

print('\n' + '='*50)
print('XCEPTION - BEST HYPERPARAMETERS')
print('='*50)
print(f'Best validation accuracy: {study_xception.best_value:.4f}')
print('Best hyperparameters:')
for key, value in study_xception.best_params.items():
    print(f'  {key}: {value}')

### 5.3 ResNet50 Optimization

In [None]:
print('='*50)
print('OPTIMIZING RESNET50')
print('='*50)

study_resnet = optuna.create_study(direction='maximize', study_name='resnet50_tuning')
study_resnet.optimize(lambda trial: objective(trial, 'resnet'), n_trials=20)

print('\n' + '='*50)
print('RESNET50 - BEST HYPERPARAMETERS')
print('='*50)
print(f'Best validation accuracy: {study_resnet.best_value:.4f}')
print('Best hyperparameters:')
for key, value in study_resnet.best_params.items():
    print(f'  {key}: {value}')

## 6. Train Final Models with Best Hyperparameters

In [None]:
def train_final_model(model_name, best_params, epochs=30):
    """
    Train model with optimized hyperparameters
    """
    keras.backend.clear_session()
    
    # Create datasets
    train_ds, val_ds, test_ds = create_datasets(best_params['batch_size'])
    
    # Create model
    model = create_model(model_name, best_params['dropout_rate'])
    
    # Optimizer
    if best_params['optimizer'] == 'adam':
        optimizer = keras.optimizers.Adam(learning_rate=best_params['learning_rate'])
    elif best_params['optimizer'] == 'nadam':
        optimizer = keras.optimizers.Nadam(learning_rate=best_params['learning_rate'])
    else:
        optimizer = keras.optimizers.RMSprop(learning_rate=best_params['learning_rate'])
    
    # Compile
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    # Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-7)
    ]
    
    # Train
    print(f'\nTraining {model_name.upper()} for {epochs} epochs...')
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        callbacks=callbacks,
        verbose=1
    )
    
    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_ds, verbose=0)
    print(f'{model_name.upper()} - Test Accuracy: {test_acc:.4f}')
    
    # Save model
    model_path = f'{model_name}_optimized.h5'
    model.save(model_path)
    print(f'‚úì Saved to {model_path}')
    
    return model, history, test_acc

In [None]:
# Train all 3 models with best hyperparameters
results = {}

models_to_train = [
    ('densenet', study_densenet.best_params),
    ('xception', study_xception.best_params),
    ('resnet', study_resnet.best_params)
]

for model_name, best_params in models_to_train:
    model, history, test_acc = train_final_model(model_name, best_params, epochs=30)
    results[model_name] = {
        'model': model,
        'history': history,
        'test_acc': test_acc,
        'best_params': best_params
    }

print('\n' + '='*50)
print('ALL MODELS TRAINED!')
print('='*50)

## 7. Results Comparison

In [None]:
# Create comparison table
comparison_data = []
for model_name, result in results.items():
    comparison_data.append({
        'Model': model_name.upper(),
        'Test Accuracy': f"{result['test_acc']:.4f}",
        'Learning Rate': f"{result['best_params']['learning_rate']:.2e}",
        'Dropout': result['best_params']['dropout_rate'],
        'Batch Size': result['best_params']['batch_size'],
        'Optimizer': result['best_params']['optimizer']
    })

df_results = pd.DataFrame(comparison_data)
print('\n' + '='*80)
print('FINAL MODEL COMPARISON')
print('='*80)
print(df_results.to_string(index=False))
print('='*80)

# Save to CSV
df_results.to_csv('model_comparison.csv', index=False)
print('\n‚úì Saved comparison to model_comparison.csv')

In [None]:
# Plot training histories
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

for idx, (model_name, result) in enumerate(results.items()):
    history = result['history']
    
    axes[idx].plot(history.history['accuracy'], label='Train Accuracy')
    axes[idx].plot(history.history['val_accuracy'], label='Val Accuracy')
    axes[idx].set_title(f'{model_name.upper()}\nTest Acc: {result["test_acc"]:.4f}')
    axes[idx].set_xlabel('Epoch')
    axes[idx].set_ylabel('Accuracy')
    axes[idx].legend()
    axes[idx].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=300, bbox_inches='tight')
plt.show()
print('‚úì Saved plot to training_curves.png')

## 8. Download Optimized Models

Download these model files for Stage 3 (Ensemble):

In [None]:
print('Models saved:')
for model_name in ['densenet', 'xception', 'resnet']:
    model_file = f'{model_name}_optimized.h5'
    if os.path.exists(model_file):
        size_mb = os.path.getsize(model_file) / (1024*1024)
        print(f'  ‚úì {model_file} ({size_mb:.1f} MB)')

print('\nDownload these .h5 files from the Files panel for Stage 3!')

## Summary

‚úÖ **Completed:**
- Optuna hyperparameter optimization (20 trials per model)
- Trained 3 optimized models (DenseNet121, Xception, ResNet50)
- Saved model weights (.h5 files)
- Generated performance comparison

**Next Step:** Use these 3 models in **Stage 3: Ensemble Stacking**