In [24]:
!pip -q install optuna==3.6.1

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [27]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, ConcatDataset
from torchvision import datasets, transforms, models
from torchvision.models import resnet18, ResNet18_Weights
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import math

In [28]:
data_dir = "/kaggle/input/brain-tumor-mri-dataset/"
train_dir = os.path.join(data_dir, "Training")
test_dir = os.path.join(data_dir, "Testing")

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

In [29]:
train_data = datasets.ImageFolder(train_dir, transform=transform)
test_data = datasets.ImageFolder(test_dir, transform=transform)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [31]:
val_ratio = 0.15
train_len = int((1 - val_ratio) * len(train_data))
val_len = len(train_data) - train_len
g = torch.Generator().manual_seed(42)
train_subset, val_subset = random_split(train_data, [train_len, val_len], generator=g)

In [32]:

def build_transforms(trial):
    aug_level = trial.suggest_categorical("aug_level", ["none", "flip", "flip_rot"])
    aug_list = []
    if aug_level in ["flip", "flip_rot"]:
        aug_list.append(transforms.RandomHorizontalFlip(p=0.5))
    if aug_level == "flip_rot":
        deg = trial.suggest_int("rot_deg", 0, 15)
        if deg > 0:
            aug_list.append(transforms.RandomRotation(degrees=deg))
    train_tf = transforms.Compose([
        transforms.Resize((224, 224)), *aug_list,
        transforms.ToTensor(),
        transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
    val_tf = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
    return train_tf, val_tf

In [33]:

def build_model(trial, num_classes):
    try:
        m = resnet18(weights=ResNet18_Weights.DEFAULT)
    except:
        m = resnet18(weights=None)
    freeze_ratio = trial.suggest_float("freeze_ratio", 0.0, 0.8)
    layers = [m.conv1, m.bn1, m.layer1, m.layer2, m.layer3, m.layer4]
    for block in layers[:int(math.floor(len(layers) * freeze_ratio))]:
        for p in block.parameters():
            p.requires_grad = False
    p_drop = trial.suggest_float("dropout", 0.0, 0.6)
    m.fc = nn.Sequential(nn.Dropout(p_drop), nn.Linear(m.fc.in_features, num_classes))
    return m

In [34]:

def make_loaders(train_subset, val_subset, train_tf, val_tf, batch_size):
    train_subset.dataset.transform = train_tf
    val_subset.dataset.transform   = val_tf
    return (
        DataLoader(train_subset, batch_size=batch_size, shuffle=True,  num_workers=2, pin_memory=True),
        DataLoader(val_subset,   batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    )

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        out = model(x)
        l = criterion(out, y)
        l.backward(); optimizer.step()
        loss += l.item() * y.size(0)
        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)
    return loss / total, 100. * correct / total

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        l = criterion(out, y)
        loss += l.item() * y.size(0)
        correct += (out.argmax(1) == y).sum().item()
        total += y.size(0)
    return loss / total, 100. * correct / total

In [35]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def objective(trial):
    opt_name = trial.suggest_categorical("optimizer", ["Adam", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    wd = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)
    bs = trial.suggest_categorical("batch_size", [16, 32, 64])
    sched_on = trial.suggest_categorical("use_scheduler", [True, False])
    if sched_on:
        step_size = trial.suggest_int("step_size", 1, 4)
        gamma = trial.suggest_float("gamma", 0.2, 0.9)

    train_tf, val_tf = build_transforms(trial)
    train_loader_t, val_loader_t = make_loaders(train_subset, val_subset, train_tf, val_tf, bs)

    model_t = build_model(trial, num_classes=4).to(device)
    criterion_t = nn.CrossEntropyLoss()

    if opt_name == "Adam":
        optimizer_t = optim.Adam(filter(lambda p: p.requires_grad, model_t.parameters()), lr=lr, weight_decay=wd)
    else:
        momentum = trial.suggest_float("momentum", 0.7, 0.99)
        optimizer_t = optim.SGD(filter(lambda p: p.requires_grad, model_t.parameters()),
                                lr=lr, momentum=momentum, weight_decay=wd, nesterov=True)

    scheduler_t = StepLR(optimizer_t, step_size=step_size, gamma=gamma) if sched_on else None

    best_val = 0.0
    for epoch in range(6):
        train_one_epoch(model_t, train_loader_t, optimizer_t, criterion_t, device)
        _, val_acc = evaluate(model_t, val_loader_t, criterion_t, device)
        if scheduler_t: scheduler_t.step()
        trial.report(val_acc, epoch)
        if trial.should_prune(): raise optuna.TrialPruned()
        best_val = max(best_val, val_acc)

    return best_val

In [36]:
study = optuna.create_study(direction="maximize", sampler=TPESampler(seed=42), pruner=MedianPruner())
study.optimize(objective, n_trials=20, show_progress_bar=True)
print("Best Params:", study.best_params)

[I 2025-08-21 12:31:45,106] A new study created in memory with name: no-name-a33cf665-a97b-4c97-85c9-3814f157e5ad


  0%|          | 0/20 [00:00<?, ?it/s]

[I 2025-08-21 12:33:27,718] Trial 0 finished with value: 98.24970828471412 and parameters: {'optimizer': 'SGD', 'lr': 0.001570297088405539, 'weight_decay': 6.251373574521755e-05, 'batch_size': 16, 'use_scheduler': True, 'step_size': 3, 'gamma': 0.21440914600706173, 'aug_level': 'none', 'freeze_ratio': 0.1454599737656805, 'dropout': 0.11004270591206029, 'momentum': 0.7882302504582659}. Best is trial 0 with value: 98.24970828471412.
[I 2025-08-21 12:35:06,519] Trial 1 finished with value: 98.13302217036173 and parameters: {'optimizer': 'Adam', 'lr': 7.476312062252303e-05, 'weight_decay': 6.847920095574779e-05, 'batch_size': 64, 'use_scheduler': False, 'aug_level': 'flip_rot', 'rot_deg': 0, 'freeze_ratio': 0.48603588152115074, 'dropout': 0.10231447421237491}. Best is trial 0 with value: 98.24970828471412.
[I 2025-08-21 12:36:47,183] Trial 2 finished with value: 98.24970828471412 and parameters: {'optimizer': 'SGD', 'lr': 0.00788671412999049, 'weight_decay': 0.0002661901888489054, 'batch_s