# Fetch Data If It Doesn't Exist

Data from: https://www.kaggle.com/datasets/samuelcortinhas/muffin-vs-chihuahua-image-classification

In [1]:
import kagglehub
from pathlib import Path

# Download latest version
path = kagglehub.dataset_download("samuelcortinhas/muffin-vs-chihuahua-image-classification")

print("Path to dataset files:", path)

DATA_DIR = Path(path)  # adjust if needed
TRAIN_DIR = DATA_DIR / "train"
VAL_DIR = DATA_DIR / "test"



  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\JTSoundy1\.cache\kagglehub\datasets\samuelcortinhas\muffin-vs-chihuahua-image-classification\versions\2


# Train

In [None]:
import os
import optuna
from typing import Tuple

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)
print("CUDA visible devices:", os.environ.get("CUDA_VISIBLE_DEVICES", "None"))
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")
torch.backends.cudnn.benchmark = True  # allow cuDNN to pick optimal conv algorithms

NUM_CLASSES = 2
NUM_EPOCHS = 3   # keep small for demo; you can bump this up later
BATCH_SIZE = 32  # this will be modified inside the objective if you want




def get_data_loaders(batch_size: int) -> Tuple[DataLoader, DataLoader]:
    """
    Create PyTorch dataloaders for train and validation.
    """
    # Basic augmentation for train; just resize+center crop for val
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
    ])

    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
    ])

    train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=train_transform)
    val_dataset = datasets.ImageFolder(root=VAL_DIR, transform=val_transform)

    num_workers = os.cpu_count() // 2  # e.g. half your cores

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
        persistent_workers=True,
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
        persistent_workers=True,
    )

    return train_loader, val_loader


def build_model(dropout: float, freeze_backbone: bool) -> nn.Module:
    """
    Create a ResNet18-based classifier with a tunable dropout and
    an option to freeze the backbone.
    """
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

    # Optionally freeze all backbone parameters
    if freeze_backbone:
        for param in model.parameters():
            param.requires_grad = False

    # Replace the final classification layer:
    in_features = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(p=dropout),
        nn.Linear(in_features, NUM_CLASSES)
    )

    return model.to(DEVICE)


def train_one_epoch(
    model: nn.Module,
    loader: DataLoader,
    criterion: nn.Module,
    optimizer: optim.Optimizer
) -> float:
    """
    Train the model for a single epoch.
    Returns average training loss.
    """
    model.train()
    running_loss = 0.0

    for inputs, labels in loader:
        inputs = inputs.to(DEVICE, non_blocking=True)
        labels = labels.to(DEVICE, non_blocking=True)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)

    return running_loss / len(loader.dataset)


def evaluate(model: nn.Module, loader: DataLoader, criterion: nn.Module) -> Tuple[float, float]:
    """
    Evaluate the model on a validation set.
    Returns (average loss, accuracy).
    """
    model.eval()
    running_loss = 0.0
    correct = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)

            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()

    avg_loss = running_loss / len(loader.dataset)
    accuracy = correct / len(loader.dataset)

    return avg_loss, accuracy


def objective(trial: optuna.trial.Trial) -> float:
    """
    Optuna objective function.
    Given a trial, sample hyperparameters, train the model briefly,
    and return validation accuracy.
    """

    # 1. Sample hyperparameters
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "SGD"])
    dropout = trial.suggest_float("dropout", 0.0, 0.7)
    freeze_backbone = trial.suggest_categorical("freeze_backbone", [True, False])
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64])

    # 2. Data loaders
    train_loader, val_loader = get_data_loaders(batch_size)

    # 3. Model, criterion, optimizer
    model = build_model(dropout=dropout, freeze_backbone=freeze_backbone)
    criterion = nn.CrossEntropyLoss()

    if optimizer_name == "Adam":
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), lr=lr
        )
    else:  # SGD
        momentum = trial.suggest_float("momentum", 0.5, 0.99)
        optimizer = optim.SGD(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=lr,
            momentum=momentum,
        )

    # 4. Training loop (short, on purpose)
    best_val_acc = 0.0
    for epoch in range(NUM_EPOCHS):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc = evaluate(model, val_loader, criterion)

        # Report to Optuna so it can prune bad trials early
        trial.report(val_acc, epoch)

        # Optional pruning:
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        # For logging/debugging
        print(
            f"Trial {trial.number} | "
            f"Epoch {epoch+1}/{NUM_EPOCHS} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val Loss: {val_loss:.4f} | "
            f"Val Acc: {val_acc:.4f}"
        )

        best_val_acc = max(best_val_acc, val_acc)

    return best_val_acc


# You can pass a direction="maximize" since we want to maximize accuracy
study = optuna.create_study(
    direction="maximize",
    study_name="muffin_vs_chihuahua"
)

# Run a small number of trials for live demo.
# Increase this for better performance.
study.optimize(objective, n_trials=2, timeout=None)

print("Number of finished trials: ", len(study.trials))
print("Best trial:")
best_trial = study.best_trial

print(f"  Value (Val Accuracy): {best_trial.value:.4f}")
print("  Params:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")





[I 2025-11-20 14:54:01,506] A new study created in memory with name: muffin_vs_chihuahua


Using device: cuda
CUDA visible devices: None
GPU name: NVIDIA GeForce RTX 4080 Laptop GPU
Trial 0 | Epoch 1/3 | Train Loss: 0.3229 | Val Loss: 0.1457 | Val Acc: 0.9772
Trial 0 | Epoch 2/3 | Train Loss: 0.1427 | Val Loss: 0.0956 | Val Acc: 0.9797
Trial 0 | Epoch 3/3 | Train Loss: 0.1044 | Val Loss: 0.0744 | Val Acc: 0.9856


[I 2025-11-20 14:55:08,191] Trial 0 finished with value: 0.9856418918918919 and parameters: {'lr': 0.0005822840473861821, 'optimizer': 'SGD', 'dropout': 0.14900869821485335, 'freeze_backbone': True, 'batch_size': 32, 'momentum': 0.5742311171402458}. Best is trial 0 with value: 0.9856418918918919.


Trial 1 | Epoch 1/3 | Train Loss: 0.2357 | Val Loss: 0.1532 | Val Acc: 0.9417
Trial 1 | Epoch 2/3 | Train Loss: 0.1373 | Val Loss: 0.0925 | Val Acc: 0.9704
Trial 1 | Epoch 3/3 | Train Loss: 0.1013 | Val Loss: 0.2029 | Val Acc: 0.9299


[I 2025-11-20 14:56:19,664] Trial 1 finished with value: 0.9704391891891891 and parameters: {'lr': 0.0017506599143874144, 'optimizer': 'Adam', 'dropout': 0.5913934936453981, 'freeze_backbone': False, 'batch_size': 32}. Best is trial 0 with value: 0.9856418918918919.


Trial 2 | Epoch 1/3 | Train Loss: 0.4197 | Val Loss: 0.1244 | Val Acc: 0.9823
Trial 2 | Epoch 2/3 | Train Loss: 0.2121 | Val Loss: 0.0808 | Val Acc: 0.9840
Trial 2 | Epoch 3/3 | Train Loss: 0.1784 | Val Loss: 0.0654 | Val Acc: 0.9848


[I 2025-11-20 14:57:29,238] Trial 2 finished with value: 0.9847972972972973 and parameters: {'lr': 8.386029932692986e-05, 'optimizer': 'SGD', 'dropout': 0.6923864616687978, 'freeze_backbone': True, 'batch_size': 16, 'momentum': 0.8845979289076844}. Best is trial 0 with value: 0.9856418918918919.


Trial 3 | Epoch 1/3 | Train Loss: 0.0987 | Val Loss: 0.0173 | Val Acc: 0.9941
Trial 3 | Epoch 2/3 | Train Loss: 0.0211 | Val Loss: 0.0097 | Val Acc: 0.9966
Trial 3 | Epoch 3/3 | Train Loss: 0.0135 | Val Loss: 0.0130 | Val Acc: 0.9932


[I 2025-11-20 14:58:38,817] Trial 3 finished with value: 0.9966216216216216 and parameters: {'lr': 4.3232468750183284e-05, 'optimizer': 'Adam', 'dropout': 0.6595227543324216, 'freeze_backbone': False, 'batch_size': 32}. Best is trial 3 with value: 0.9966216216216216.


Trial 4 | Epoch 1/3 | Train Loss: 0.6379 | Val Loss: 0.4294 | Val Acc: 0.8184
Trial 4 | Epoch 2/3 | Train Loss: 0.4081 | Val Loss: 0.3152 | Val Acc: 0.8843
Trial 4 | Epoch 3/3 | Train Loss: 0.3560 | Val Loss: 0.4625 | Val Acc: 0.7956


[I 2025-11-20 14:59:53,563] Trial 4 finished with value: 0.8842905405405406 and parameters: {'lr': 0.005698433288967356, 'optimizer': 'Adam', 'dropout': 0.046468003681466814, 'freeze_backbone': False, 'batch_size': 16}. Best is trial 3 with value: 0.9966216216216216.
[I 2025-11-20 15:00:52,787] Trial 5 pruned. 


Trial 6 | Epoch 1/3 | Train Loss: 0.2135 | Val Loss: 0.0365 | Val Acc: 0.9873
Trial 6 | Epoch 2/3 | Train Loss: 0.0519 | Val Loss: 0.0244 | Val Acc: 0.9932
Trial 6 | Epoch 3/3 | Train Loss: 0.0448 | Val Loss: 0.0187 | Val Acc: 0.9941


[I 2025-11-20 15:02:07,822] Trial 6 finished with value: 0.9940878378378378 and parameters: {'lr': 0.0018701797068273907, 'optimizer': 'SGD', 'dropout': 0.6672307712145075, 'freeze_backbone': False, 'batch_size': 64, 'momentum': 0.7413821851388355}. Best is trial 3 with value: 0.9966216216216216.
[I 2025-11-20 15:03:07,277] Trial 7 pruned. 


Trial 8 | Epoch 1/3 | Train Loss: 0.1503 | Val Loss: 0.0298 | Val Acc: 0.9916
Trial 8 | Epoch 2/3 | Train Loss: 0.0408 | Val Loss: 0.0214 | Val Acc: 0.9949
Trial 8 | Epoch 3/3 | Train Loss: 0.0262 | Val Loss: 0.0159 | Val Acc: 0.9966


[I 2025-11-20 15:06:15,431] Trial 8 finished with value: 0.9966216216216216 and parameters: {'lr': 0.00018539214322044838, 'optimizer': 'SGD', 'dropout': 0.18401051206636354, 'freeze_backbone': False, 'batch_size': 32, 'momentum': 0.9629030521687365}. Best is trial 3 with value: 0.9966216216216216.


Trial 9 | Epoch 1/3 | Train Loss: 0.0867 | Val Loss: 0.0242 | Val Acc: 0.9916
Trial 9 | Epoch 2/3 | Train Loss: 0.0358 | Val Loss: 0.0121 | Val Acc: 0.9975
Trial 9 | Epoch 3/3 | Train Loss: 0.0379 | Val Loss: 0.0352 | Val Acc: 0.9899


[I 2025-11-20 15:07:56,125] Trial 9 finished with value: 0.9974662162162162 and parameters: {'lr': 0.00021730902782433924, 'optimizer': 'Adam', 'dropout': 0.6574580585651273, 'freeze_backbone': False, 'batch_size': 16}. Best is trial 9 with value: 0.9974662162162162.


Number of finished trials:  10
Best trial:
  Value (Val Accuracy): 0.9975
  Params:
    lr: 0.00021730902782433924
    optimizer: Adam
    dropout: 0.6574580585651273
    freeze_backbone: False
    batch_size: 16


# Optuna Results

In [6]:
from optuna.visualization import plot_optimization_history, plot_param_importances

# After study.optimize(...)
fig_hist = plot_optimization_history(study)
fig_hist.show()

fig_importance = plot_param_importances(study)
fig_importance.show()

plot_slice(study).show()
plot_contour(study).show()

NameError: name 'study' is not defined