# Hyperparameter Optimization for CNN on Tiny ImageNet

This notebook implements an automated hyperparameter optimization pipeline for CNN architecture using Optuna.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from pathlib import Path
import logging
import yaml

# Configuration
CONFIG = {
    'data_dir': Path('../data/tiny-imagenet-200'),
    'output_dir': Path('./outputs'),
    'num_classes': 200,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu')
}

# Setup logging and directories
logging.basicConfig(level=logging.INFO)
CONFIG['output_dir'].mkdir(exist_ok=True)
print(f"Using device: {CONFIG['device']}")

## 1. Data Pipeline

In [None]:
class DataManager:
    def __init__(self, data_dir):
        self.data_dir = Path(data_dir)
        self.transform = self._get_transforms()

    def _get_transforms(self):
        return transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], 
                               [0.229, 0.224, 0.225])
        ])

    def get_loaders(self, batch_size):
        train_dataset = ImageFolder(
            self.data_dir / 'train',
            self.transform
        )
        val_dataset = ImageFolder(
            self.data_dir / 'val',
            self.transform
        )

        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=4,
            pin_memory=True
        )
        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=4,
            pin_memory=True
        )

        return train_loader, val_loader

## 2. Model Architecture

In [None]:
class CNN(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.features = self._make_features(config)
        self.classifier = self._make_classifier(config)

    def _make_features(self, config):
        layers = []
        in_channels = 3

        for i in range(config['num_conv_layers']):
            out_channels = config['channels'][i]
            layers.extend([
                nn.Conv2d(in_channels, out_channels, 3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2)
            ])
            in_channels = out_channels

        return nn.Sequential(*layers)

    def _make_classifier(self, config):
        feature_size = 64 // (2 ** config['num_conv_layers'])
        flatten_size = config['channels'][-1] * feature_size * feature_size

        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(flatten_size, config['hidden_size']),
            nn.ReLU(inplace=True),
            nn.Dropout(config['dropout']),
            nn.Linear(config['hidden_size'], CONFIG['num_classes'])
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

## 3. Training Pipeline

In [None]:
class Trainer:
    def __init__(self, model, optimizer, criterion):
        self.model = model.to(CONFIG['device'])
        self.optimizer = optimizer
        self.criterion = criterion
        self.best_accuracy = 0

    def train_epoch(self, train_loader):
        self.model.train()
        running_loss = 0
        correct = 0
        total = 0

        for inputs, targets in train_loader:
            inputs = inputs.to(CONFIG['device'])
            targets = targets.to(CONFIG['device'])

            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, targets)

            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        return running_loss / len(train_loader), 100. * correct / total

    def validate(self, val_loader):
        self.model.eval()
        running_loss = 0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(CONFIG['device'])
                targets = targets.to(CONFIG['device'])
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)

                running_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

        accuracy = 100. * correct / total
        if accuracy > self.best_accuracy:
            self.best_accuracy = accuracy

        return running_loss / len(val_loader), accuracy

## 4. Hyperparameter Optimization

In [None]:
class HyperparameterOptimizer:
    def __init__(self):
        self.study = optuna.create_study(direction="maximize")
        self.data_manager = DataManager(CONFIG['data_dir'])

    def suggest_params(self, trial):
        return {
            'num_conv_layers': trial.suggest_int('num_conv_layers', 3, 5),
            'channels': [trial.suggest_int(f'channels_{i}', 32, 256, 32) 
                        for i in range(5)],
            'hidden_size': trial.suggest_int('hidden_size', 512, 2048, 128),
            'dropout': trial.suggest_float('dropout', 0.1, 0.5),
            'learning_rate': trial.suggest_float('lr', 1e-5, 1e-2, log=True),
            'batch_size': trial.suggest_categorical('batch_size', [32, 64, 128])
        }

    def objective(self, trial):
        config = self.suggest_params(trial)
        train_loader, val_loader = self.data_manager.get_loaders(config['batch_size'])

        model = CNN(config)
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
        criterion = nn.CrossEntropyLoss()
        trainer = Trainer(model, optimizer, criterion)

        for epoch in range(10):
            train_loss, train_acc = trainer.train_epoch(train_loader)
            val_loss, val_acc = trainer.validate(val_loader)
            
            trial.report(val_acc, epoch)
            if trial.should_prune():
                raise optuna.TrialPruned()

        return trainer.best_accuracy

    def optimize(self, n_trials=100):
        self.study.optimize(self.objective, n_trials=n_trials)
        return self.study.best_params, self.study.best_value

## 5. Model Training with Best Parameters

In [None]:
def train_final_model(config, epochs=30):
    data_manager = DataManager(CONFIG['data_dir'])
    train_loader, val_loader = data_manager.get_loaders(config['batch_size'])
    
    model = CNN(config)
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.CrossEntropyLoss()
    trainer = Trainer(model, optimizer, criterion)
    
    best_acc = 0
    for epoch in range(epochs):
        train_loss, train_acc = trainer.train_epoch(train_loader)
        val_loss, val_acc = trainer.validate(val_loader)
        
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save({
                'model_state_dict': model.state_dict(),
                'config': config,
                'accuracy': val_acc
            }, CONFIG['output_dir'] / 'best_model.pth')
        
        logging.info(f'Epoch {epoch+1}/{epochs} - '
                    f'Train Acc: {train_acc:.2f}%, '
                    f'Val Acc: {val_acc:.2f}%')
    
    return best_acc

## 6. Run Complete Pipeline

In [None]:
def main():
    # Run hyperparameter optimization
    optimizer = HyperparameterOptimizer()
    best_params, best_value = optimizer.optimize(n_trials=100)
    
    logging.info(f"Best validation accuracy: {best_value:.2f}%")
    logging.info("Best hyperparameters:")
    for param, value in best_params.items():
        logging.info(f"{param}: {value}")
    
    # Train final model
    final_accuracy = train_final_model(best_params)
    logging.info(f"Final model accuracy: {final_accuracy:.2f}%")
    
    return {
        'best_params': best_params,
        'best_accuracy': final_accuracy
    }

if __name__ == "__main__":
    results = main()