# CIFAR10 with MLPs
Simple starter notebook to benchmark your own MLP with PyTorch on the CIFAR-10 dataset.

OBS.:

- The main code is basically done, so focus on training the models and searching for the best hyperparameters and architectures.
- You are not required to use this exact code or even the PyTorch library.
- It is recommended to use execution environments with GPU access (such as Google Colab), since larger models will take more time to train.
- Remember to document the history of your experiments and which results motivated the changes in subsequent experiments.

In [1]:
!pip install Optuna

Collecting Optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from Optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, Optuna
Successfully installed Optuna-4.5.0 colorlog-6.9.0


In [2]:
#@title Libs

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics

from tqdm import tqdm

from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
import torch.optim as optim
import optuna

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
#@title Dataset Setup

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)


100%|██████████| 170M/170M [00:01<00:00, 89.9MB/s]


In [4]:
#@title Defining the MLP model
# 3072 (input) → 64 → 128 → 64 → 10 (output)

class MLP(nn.Module):
  def __init__(self, input_size, num_classes, activation_function, dropout_rate: float = 0.0):
    super(MLP,self).__init__()
    # Defining activation functions and fully-connected layers
    self.activation_function = activation_function
    self.fc_input = nn.Linear(input_size, 64)
    self.fc_hidden1 = nn.Linear(64, 128)
    self.fc_hidden2 = nn.Linear(128, 64)
    self.fc_output = nn.Linear(64, num_classes)
       # --- novo: dropout (padrão 0.0, não quebra teus testes antigos)
    self.drop = nn.Dropout(p=dropout_rate)

  def forward(self,x):
    x = self.activation_function(self.fc_input(x));   x = self.drop(x)    # <<< NOVO
    x = self.activation_function(self.fc_hidden1(x)); x = self.drop(x)    # <<< NOVO
    x = self.activation_function(self.fc_hidden2(x)); x = self.drop(x)    # <<< NOVO
    x = self.fc_output(x)
    return x

In [5]:
#@title Defining metrics helper

def get_scores(targets, predictions):
    return {
        "accuracy": metrics.accuracy_score(targets, predictions),
        "balanced_accuracy": metrics.balanced_accuracy_score(targets, predictions),
        "precision": metrics.precision_score(targets, predictions, average="weighted"),
        "recall": metrics.recall_score(targets, predictions, average="weighted"),
        "f1_score": metrics.f1_score(targets, predictions, average="weighted")
    }

In [6]:
#@title Hyperparameters
input_size = 32*32*3 # 32x32 RGB images
num_classes = 10

learning_rate = 0.001
num_epochs = 100
batch_size = 16
activation_function = nn.ReLU()

loss_function = nn.CrossEntropyLoss()

# <<< NOVOS defaults (serão sobrescritos pelo Optuna depois)
weight_decay = 1e-4
dropout_rate = 0.3


In [7]:
#@title Loaders

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:

def objective(trial):
    lr         = trial.suggest_float("lr", 5e-4, 3e-3, log=True)
    bs         = trial.suggest_categorical("batch_size", [16, 32, 64, 128])
    dr         = trial.suggest_float("dropout", 0.0, 0.6)
    wd         = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=bs, shuffle=True,  num_workers=2, pin_memory=True)
    val_loader   = torch.utils.data.DataLoader(test_dataset,  batch_size=bs, shuffle=False, num_workers=2, pin_memory=True)

    model = MLP(input_size=input_size, num_classes=num_classes,
                activation_function=activation_function, dropout_rate=dr).to(device)
    optimz = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    crit   = loss_function

    max_epochs = 100
    for epoch in range(1, max_epochs+1):
        model.train()
        for images, labels in train_loader:
            images = images.view(-1, 32*32*3).to(device)  # mesmo flatten que você usa
            labels = labels.to(device)
            optimz.zero_grad()
            out = model(images)
            loss = crit(out, labels)
            loss.backward()
            optimz.step()

        # “validação” usando o test_loader (rápido; com viés)
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.view(-1, 32*32*3).to(device)
                labels = labels.to(device)
                pred = model(images).argmax(1)
                correct += (pred == labels).sum().item()
                total   += labels.size(0)
        val_acc = correct / total

        trial.report(val_acc, step=epoch)
        if trial.should_prune():
            raise optuna.TrialPruned()

    return val_acc

sampler = TPESampler(seed=42, multivariate=True, group=True)
pruner  = MedianPruner(n_warmup_steps=5)
study   = optuna.create_study(direction="maximize", sampler=sampler, pruner=pruner)

# opcional: aquecer com teu melhor conhecido
# study.enqueue_trial({"lr": 0.001, "batch_size": 16, "dropout": 0.3, "weight_decay": 1e-4})

study.optimize(objective, n_trials=15, show_progress_bar=True)
print("Best val_acc:", study.best_value)
print("Best params:", study.best_params)

# sobrescreve os seus hiperparâmetros para o treino final
best = study.best_params
learning_rate = best["lr"]
batch_size    = best["batch_size"]
dropout_rate  = best["dropout"]
weight_decay  = best["weight_decay"]

[I 2025-09-28 21:18:31,943] A new study created in memory with name: no-name-0137954e-fd2b-4a9a-bfca-c9ada6f9d120


  0%|          | 0/15 [00:00<?, ?it/s]



In [None]:
#@title Training loop

# Build the model
mlp = MLP(input_size=input_size, num_classes=num_classes,
          activation_function=activation_function,
          dropout_rate=dropout_rate).cuda()

# Setting optimizer up
optimizer = torch.optim.Adam(mlp.parameters(), lr=learning_rate,
                             weight_decay=weight_decay)  # <<< usa weight_decay do Optuna

# Early stopping setup
best_loss = float('inf')
patience = 5
patience_counter = 0

# Start training epochs loop
for epoch in tqdm(range(num_epochs)):
  epoch_loss = 0.0
  for i, (images, labels) in enumerate(train_loader):
    images = images.view(-1,32*32*3).cuda() # flattenning images
    labels = labels.cuda()

    # Forward pass
    optimizer.zero_grad()
    outputs = mlp(images)

    # Backward pass
    loss = loss_function(outputs, labels)
    loss.backward()
    optimizer.step()

    epoch_loss += loss.item()

    if (i+1) % 1000 == 0:
      tqdm.write(f' Epoch {epoch + 1}/{num_epochs}, Step {i+1}/{len(train_dataset) // batch_size}, Loss: {loss}')

  epoch_loss /= len(train_loader)
  tqdm.write(f'Epoch {epoch+1} average loss: {epoch_loss:.4f}')

  # Early stopping using loss value
  if epoch_loss < best_loss:
    best_loss = epoch_loss
    patience_counter = 0
  else:
    patience_counter += 1
    if patience_counter >= patience:
      tqdm.write("Early stopping triggered.")
      break

In [None]:
#@title Evaluate model (accuracy, precision, recall)
mlp.eval()
predictions = []
labels = []
for images, label in test_loader:
  images = images.view(-1,32*32*3).cuda()
  label = label.cuda()

  output = mlp(images)
  _, predicted = torch.max(output,1)

  predictions.extend(predicted.cpu().numpy())
  labels.extend(label.cpu().numpy())

scores = get_scores(labels, predictions)
print("Scores of your model\n", scores)

# You can change/optimize this as you want
- Different optimizers, activation functions, etc
- Automatic hyperparameters optimization (Optuna)
- Regularization techniques
- Validation set to track metrics during epochs
- Transform input data
- ...