In [12]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from pathlib import Path
import optuna
from optuna.trial import Trial
import torch.nn as nn


In [13]:
# Define your data directory
data_dir = Path.home() / ".cache/mads_datasets/hymenoptera_data/hymenoptera_data"

# Define transforms for training and validation
train_transforms = transforms.Compose([
    #crop to square first
    transforms.Resize(256),
    transforms.CenterCrop(224),  
    transforms.ToTensor(),                    # Convert to tensor
    transforms.Normalize(                     # Normalize with ImageNet stats
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transforms = transforms.Compose([
#crop to square first
    transforms.Resize(256),
    transforms.CenterCrop(224),      
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Create datasets
train_dataset = datasets.ImageFolder(
    root=data_dir / 'train',
    transform=train_transforms
)

val_dataset = datasets.ImageFolder(
    root=data_dir / 'val',
    transform=val_transforms
)

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=4,
    pin_memory=True  # Faster data transfer to GPU
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)
num_classes = len(train_dataset.classes)

# Print dataset info
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")
print(f"Number of classes: {len(train_dataset.classes)}")
print(f"Classes: {train_dataset.classes}")

# Test the dataloader
images, labels = next(iter(train_loader))
print(f"Batch shape: {images.shape}")  # Should be [batch_size, 3, 224, 224]
print(f"Labels shape: {labels.shape}")  # Should be [batch_size]

Training samples: 244
Validation samples: 153
Number of classes: 2
Classes: ['ants', 'bees']
Batch shape: torch.Size([4, 3, 224, 224])
Labels shape: torch.Size([4])


In [3]:
#how many images are there in the batch
print(f"Number of images in batch: {images.shape[0]}")  # Should be batch_size

Number of images in batch: 4


In [30]:

import torch
import torch.nn as nn

class DynamicSimpleCNN(nn.Module):
    def __init__(self, num_classes, conv_out_channels, fc_hidden1, fc_hidden2, fc_hidden3, dropout_rate):
        """
        Initializes the dynamic CNN.
        
        Args:
            num_classes (int): Number of output classes.
            conv_out_channels (list or tuple): A list of output channel counts 
                                                for each convolutional block.
                                                The length of this list determines
                                                the number of conv blocks.
            fc_hidden1 (int): Neurons in the 1st hidden FC layer.
            fc_hidden2 (int): Neurons in the 2nd hidden FC layer.
            fc_hidden3 (int): Neurons in the 3rd hidden FC layer.
            dropout_rate (float): Dropout probability.
        """
        super(DynamicSimpleCNN, self).__init__()
        
        self.conv_blocks = nn.ModuleList()
        self.skip_convs = nn.ModuleList()
        self.pools = nn.ModuleList()
        self.relu = nn.ReLU()
        
        in_c = 3 # Initial input channels (RGB)
        
        # --- Dynamically create conv blocks ---
        for out_c in conv_out_channels:
            # Main conv path
            self.conv_blocks.append(
                nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
            )
            
            # Skip connection path (1x1 conv for channel matching)
            self.skip_convs.append(
                nn.Conv2d(in_c, out_c, kernel_size=1, stride=1)
            )
            
            # Standardized pooling layer after each block
            self.pools.append(
                nn.MaxPool2d(kernel_size=2, stride=2)
            )
            
            # Update in_c for the next block
            in_c = out_c
            
        
        # Classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # The input to the classifier is the last output channel count
        final_conv_out = conv_out_channels[-1]
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(final_conv_out, fc_hidden1),
            nn.ReLU(),
            nn.Linear(fc_hidden1, fc_hidden2),
            nn.ReLU(),
            nn.Linear(fc_hidden2, fc_hidden3),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(fc_hidden3, num_classes)
        )
    
    def forward(self, x):
        
        # --- Pass through dynamic conv blocks ---
        for i in range(len(self.conv_blocks)):
            identity = x
            
            # Main path
            out = self.relu(self.conv_blocks[i](x))
            
            # Skip path
            skip = self.skip_convs[i](identity)
            
            # Add and activate
            x = self.relu(out + skip)
            
            # Pool
            x = self.pools[i](x)
            
        
        # --- Classifier ---
        x = self.avgpool(x)
        x = self.classifier(x)
        return x

In [34]:
import torch
import torch.nn as nn
import torchvision.models as models

# Your original class (for comparison)
# class DynamicSimpleCNN(nn.Module): ...

class DynamicClassifierWithTransferLearning(nn.Module):
    def __init__(self, num_classes, fc_hidden1, fc_hidden2, fc_hidden3, 
                 dropout_rate, freeze_base_model=True):
        """
        Initializes the model with a pre-trained base.

        Args:
            num_classes (int): Number of output classes.
            fc_hidden1 (int): Neurons in the 1st hidden FC layer.
            fc_hidden2 (int): Neurons in the 2nd hidden FC layer.
            fc_hidden3 (int): Neurons in the 3rd hidden FC layer.
            dropout_rate (float): Dropout probability.
            freeze_base_model (bool): If True, freeze the weights of the
                                      pre-trained base model.
        """
        super(DynamicClassifierWithTransferLearning, self).__init__()
        
        # 1. Load a pre-trained base model (e.g., ResNet-50)
        # We use the recommended modern weights API
        self.base_model = models.resnet50(
            weights=models.ResNet50_Weights.DEFAULT
        )

        # 2. Freeze the base model's parameters
        if freeze_base_model:
            for param in self.base_model.parameters():
                param.requires_grad = False
        
        # 3. Get the number of input features for the original classifier
        # For ResNet-50, this is 2048
        num_ftrs = self.base_model.fc.in_features
        
        # 4. Create your dynamic classifier
        # We replace the original model's nn.Flatten() because
        # the ResNet forward pass already flattens the features
        # right before the 'fc' layer.
        self.custom_classifier = nn.Sequential(
            nn.Linear(num_ftrs, fc_hidden1),
            nn.ReLU(),
            nn.Linear(fc_hidden1, fc_hidden2),
            nn.ReLU(),
            nn.Linear(fc_hidden2, fc_hidden3),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(fc_hidden3, num_classes)
        )
        
        # 5. Replace the base model's final layer with our new classifier
        self.base_model.fc = self.custom_classifier

    def forward(self, x):
        """
        Forward pass.
        The base_model already includes the AdaptiveAvgPool and Flatten.
        """
        return self.base_model(x)

In [35]:
# ============= TRAINING FUNCTIONS =============
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

In [36]:

# ============= OBJECTIVE FUNCTION FOR OPTUNA =============
def objective(trial: Trial):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Suggest hyperparameters
    lr = trial.suggest_float('lr', 1e-3, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [4])
    # conv1_out = trial.suggest_categorical('conv1_out', [64])
    # conv2_out = trial.suggest_categorical('conv2_out', [128])
    # conv3_out = trial.suggest_categorical('conv3_out', [192])
    # conv4_out = trial.suggest_categorical('conv4_out', [200,256])
    # conv5_out = trial.suggest_categorical('conv5_out', [400, 512])
    fc_hidden1 = trial.suggest_categorical('fc_hidden1', [512])
    fc_hidden2 = trial.suggest_categorical('fc_hidden2', [256])
    fc_hidden3 = trial.suggest_categorical('fc_hidden3', [128])
    dropout_rate = trial.suggest_float('dropout_rate', 0.15, 0.25)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam'])
    
    # Create dataloaders with suggested batch size
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    # Create model with suggested architecture
    model = DynamicClassifierWithTransferLearning(
        num_classes=num_classes,
        fc_hidden1=fc_hidden1,
        fc_hidden2=fc_hidden2,
        fc_hidden3=fc_hidden3,
        dropout_rate=dropout_rate
    ).to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # Create optimizer based on suggestion
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
        
    
    # Training loop (reduced epochs for faster tuning)
    num_epochs = 5
    
    for epoch in range(num_epochs):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = validate(model, val_loader, criterion, device)
        
        # Report intermediate value for pruning
        trial.report(val_acc, epoch)
        
        # Prune trial if it's not promising
        if trial.should_prune():
            raise optuna.TrialPruned()
    
    return val_acc

# ============= RUN HYPERPARAMETER SEARCH =============
if __name__ == '__main__':
    import pandas as pd
    
    # Create study
    study = optuna.create_study(
        direction='maximize',
        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=3)
    )
    
    # Run optimization
    print("Starting hyperparameter optimization...")
    study.optimize(objective, n_trials=50, timeout=None)
    
    # Convert trials to DataFrame
    trials_data = []
    for trial in study.trials:
        trial_dict = {
            'trial_number': trial.number,
            'val_accuracy': trial.value if trial.value is not None else 0.0,
            'state': trial.state.name,
            'duration_seconds': trial.duration.total_seconds() if trial.duration else 0.0,
        }
        # Add all hyperparameters
        trial_dict.update(trial.params)
        trials_data.append(trial_dict)
    
    df_results = pd.DataFrame(trials_data)
    
    # Sort by validation accuracy
    df_results = df_results.sort_values('val_accuracy', ascending=False).reset_index(drop=True)
    
    # Save to CSV
    df_results.to_csv('optuna_trials_results.csv', index=False)
    print(f"\nAll {len(df_results)} trials saved to 'optuna_trials_results.csv'")
    
    # Print results
    print("\n" + "="*50)
    print("Best trial:")
    trial = study.best_trial
    print(f"  Trial Number: {trial.number}")
    print(f"  Value (Val Accuracy): {trial.value:.4f}")
    print("  Params:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")
    
    
    # Save best hyperparameters
    import json
    with open('best_hyperparameters.json', 'w') as f:
        json.dump(trial.params, f, indent=4)
    print("\nBest hyperparameters saved to 'best_hyperparameters.json'")
    
    # Optionally visualize
    try:
        import optuna.visualization as vis
        fig = vis.plot_optimization_history(study)
        fig.write_html('optimization_history.html')
        
        fig = vis.plot_param_importances(study)
        fig.write_html('param_importances.html')
        
        print("\nVisualizations saved:")
        print("  - optimization_history.html")
        print("  - param_importances.html")
    except:
        print("Install plotly for visualization: pip install plotly")

[I 2025-10-26 19:42:46,741] A new study created in memory with name: no-name-b8e25446-f9b5-483e-94db-4aa1e4bf96ba
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\r.weenink/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth


Starting hyperparameter optimization...


100%|██████████| 97.8M/97.8M [00:01<00:00, 88.9MB/s]
[I 2025-10-26 19:43:35,310] Trial 0 finished with value: 0.8758169934640523 and parameters: {'lr': 0.0014453363307990706, 'batch_size': 4, 'fc_hidden1': 512, 'fc_hidden2': 256, 'fc_hidden3': 128, 'dropout_rate': 0.2396759262456736, 'optimizer': 'Adam'}. Best is trial 0 with value: 0.8758169934640523.
[I 2025-10-26 19:44:29,778] Trial 1 finished with value: 0.9607843137254902 and parameters: {'lr': 0.0073922186469834285, 'batch_size': 4, 'fc_hidden1': 512, 'fc_hidden2': 256, 'fc_hidden3': 128, 'dropout_rate': 0.15497484895637012, 'optimizer': 'Adam'}. Best is trial 1 with value: 0.9607843137254902.
[I 2025-10-26 19:45:24,366] Trial 2 finished with value: 0.9673202614379085 and parameters: {'lr': 0.0016090829719801736, 'batch_size': 4, 'fc_hidden1': 512, 'fc_hidden2': 256, 'fc_hidden3': 128, 'dropout_rate': 0.19819771880335438, 'optimizer': 'Adam'}. Best is trial 2 with value: 0.9673202614379085.
[I 2025-10-26 19:46:19,953] Trial 3 fin


All 50 trials saved to 'optuna_trials_results.csv'

Best trial:
  Trial Number: 2
  Value (Val Accuracy): 0.9673
  Params:
    lr: 0.0016090829719801736
    batch_size: 4
    fc_hidden1: 512
    fc_hidden2: 256
    fc_hidden3: 128
    dropout_rate: 0.19819771880335438
    optimizer: Adam

Best hyperparameters saved to 'best_hyperparameters.json'

Visualizations saved:
  - optimization_history.html
  - param_importances.html


In [37]:
df_results.head(10)

Unnamed: 0,trial_number,val_accuracy,state,duration_seconds,lr,batch_size,fc_hidden1,fc_hidden2,fc_hidden3,dropout_rate,optimizer
0,9,0.96732,COMPLETE,54.024678,0.001662,4,512,256,128,0.240649,Adam
1,2,0.96732,COMPLETE,54.587536,0.001609,4,512,256,128,0.198198,Adam
2,14,0.96732,COMPLETE,55.39784,0.001033,4,512,256,128,0.217261,Adam
3,31,0.96732,COMPLETE,57.764627,0.001375,4,512,256,128,0.22653,Adam
4,28,0.96732,COMPLETE,57.850347,0.002857,4,512,256,128,0.191047,Adam
5,23,0.96732,COMPLETE,55.234899,0.001229,4,512,256,128,0.228575,Adam
6,24,0.960784,COMPLETE,55.060538,0.002158,4,512,256,128,0.196424,Adam
7,1,0.960784,COMPLETE,54.466504,0.007392,4,512,256,128,0.154975,Adam
8,11,0.960784,COMPLETE,55.189832,0.002367,4,512,256,128,0.204714,Adam
9,43,0.960784,COMPLETE,55.549556,0.00466,4,512,256,128,0.200847,Adam
