In [2]:
# Install Optuna
!pip install optuna

import optuna                                                  # Optuna framework :contentReference[oaicite:10]{index=10}
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as T
import torchvision
import torch, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
import random, numpy as np, os



In [3]:
# ── Reuse Your Existing Setup
# (Sections 1-4: reproducibility, device, data prep, model definition)
"""seed = 42
torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
np.random.seed(seed); random.seed(seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")"""

'seed = 42\ntorch.manual_seed(seed); torch.cuda.manual_seed_all(seed)\nnp.random.seed(seed); random.seed(seed)\ndevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")'

In [4]:

# ── 1. Reproducibility ───────────────────────────────────────────────────────────
seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

In [5]:
# ── 2. Device ───────────────────────────────────────────────────────────────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [6]:
# ── 3. Data Preparation ─────────────────────────────────────────────────────────
# Transforms
transform_train = T.Compose([
    T.RandomCrop(32, padding=4),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize((0.5071, 0.4867, 0.4408),
                (0.2675, 0.2565, 0.2761)),
])
transform_test = T.Compose([
    T.ToTensor(),
    T.Normalize((0.5071, 0.4867, 0.4408),
                (0.2675, 0.2565, 0.2761)),
])

In [7]:
# Download & split
dataset_full = torchvision.datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)
val_size = 5000
train_size = len(dataset_full) - val_size
train_dataset, val_dataset = random_split(
    dataset_full, [train_size, val_size],
    generator=torch.Generator().manual_seed(seed))
test_dataset = torchvision.datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

100%|██████████| 169M/169M [00:07<00:00, 24.1MB/s]


In [8]:
# DataLoaders (batch_size=64 per paper)
batch_size = 64
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


In [9]:
# Model from [11]
class LELeNetCIFAR(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3,64,kernel_size=5,padding=2)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(64,64,kernel_size=5,padding=2)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1   = nn.Linear(64*8*8,384)
        self.fc2   = nn.Linear(384,192)
        self.fc3   = nn.Linear(192,100)
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x)); x = F.relu(self.fc2(x))
        return self.fc3(x)

In [12]:
def train_one_epoch(model, optimizer, criterion, loader):
    model.train(); total_loss=correct=0
    for imgs, lbls in loader:
        imgs, lbls = imgs.to(device), lbls.to(device)
        optimizer.zero_grad()
        out = model(imgs); loss = criterion(out, lbls)
        loss.backward(); optimizer.step()
        total_loss += loss.item()*imgs.size(0)
        correct += out.argmax(1).eq(lbls).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

def eval_model(model, criterion, loader):
    model.eval(); total_loss=correct=0
    with torch.no_grad():
        for imgs, lbls in loader:
            imgs, lbls = imgs.to(device), lbls.to(device)
            out = model(imgs); loss = criterion(out, lbls)
            total_loss += loss.item()*imgs.size(0)
            correct += out.argmax(1).eq(lbls).sum().item()
    return total_loss/len(loader.dataset), correct/len(loader.dataset)

In [13]:
# ── Optuna Objective Function
def objective(trial):
    # Suggest hyperparameters :contentReference[oaicite:11]{index=11}
    lr = trial.suggest_float("lr", 1e-3, 1e-1, log=True)
    wd = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
    bs = trial.suggest_categorical("batch_size", [32, 64, 128])
    epochs = trial.suggest_int("epochs", 50, 150, step=25)

    # DataLoaders with variable batch size
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True, num_workers=2)
    val_loader   = DataLoader(val_dataset,   batch_size=bs, shuffle=False, num_workers=2)

    # Model, optimizer, scheduler per trial
    model = LELeNetCIFAR().to(device)
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
    scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
    criterion = nn.CrossEntropyLoss()

    best_val_acc = 0
    for epoch in range(1, epochs+1):
        train_one_epoch(model, optimizer, criterion, train_loader)
        val_loss, val_acc = eval_model(model, criterion, val_loader)
        scheduler.step()
        trial.report(val_acc, epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()
        if val_acc > best_val_acc:
            best_val_acc = val_acc

    return best_val_acc

In [None]:
# ── Run the Optuna Study
study = optuna.create_study(direction="maximize")               # maximize validation accuracy :contentReference[oaicite:12]{index=12}
study.optimize(objective, n_trials=20)                          # e.g., 20 trials :contentReference[oaicite:13]{index=13}
print("Best hyperparameters:", study.best_params)               # use these for final training

[I 2025-04-30 16:15:27,593] A new study created in memory with name: no-name-c9f5da54-f384-4be2-8399-781e7f1bfbcf
