# Hyperparameter tuning

In [1]:
!pip install optuna optuna-dashboard torch torchvision tqdm plotly

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting optuna-dashboard
  Downloading optuna_dashboard-0.20.0-py3-none-any.whl.metadata (6.7 kB)
Collecting torch
  Downloading torch-2.9.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting bottle>=0.13.0 (from optuna-dashboard)
  Downloading bottle-0.13.4-py2.py3-none-any.whl.metadata (1.6 kB)
Collecting filelock (from torch)
  Downloading filelock-3.20.1-py3-none-any.whl.metadata (2.1 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvid

## Dataset

In [2]:
# Use the below functionality to execute your model (that you will adjust later step by step)
# This block of code provides you the functionality to train a model. Results are printed after each epoch

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import tqdm


def load_mnist_data(root_path='./data', batch_size=4):
    """
    Loads MNIST dataset into your directory.
    You can change the root_path to point to a already existing path if you want to safe a little bit of memory :)
    """
    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5))]
    )

    trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    return trainloader, testloader

## Training loops

In [3]:
def train_model(model, optimizer, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.train()

    running_loss = 0.0
    running_accuracy = []
    for imgs, targets in dataloader:
        imgs, targets = imgs.to(device=device), targets.to(device=device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(imgs.reshape(imgs.shape[0], -1))

        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

        # Calculate the Accuracy (how many of all samples are correctly classified?)
        max_outputs = torch.max(outputs, dim=1).indices
        accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
        running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Training iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def eval_model(model, loss_fn, dataloader):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    model = model.to(device)
    model.eval()

    running_loss = 0.0
    running_accuracy = []
    with torch.no_grad():
        for imgs, targets in dataloader:
            imgs, targets = imgs.to(device=device), targets.to(device=device)

            # forward + backward + optimize
            outputs = model(imgs.reshape(imgs.shape[0], -1))

            loss = loss_fn(outputs, targets)

            # print statistics
            running_loss += loss.item()

            # Calculate the Accuracy (how many of all samples are correctly classified?)
            max_outputs = torch.max(outputs, dim=1).indices
            accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
            running_accuracy.append(accuracy)
    
    avg_loss = running_loss / len(dataloader)
    avg_acc = torch.tensor(running_accuracy).mean()
    # print(f'Evaluation iteration finished with loss: {avg_loss:.3f} and accuracy {avg_acc:.3f}')

    return avg_loss, avg_acc


def operate(model, optimizer, loss_fn, train_dataloader, test_dataloader, epochs):
    t_losses, t_accs = [], []
    e_losses, e_accs = [], []
    for epoch in range(0, epochs):
        t_avg_loss, t_avg_acc = train_model(
            model, optimizer, loss_fn, train_dataloader
        )
        t_losses.append(t_avg_loss)
        t_accs.append(t_accs)

        e_avg_loss, e_avg_acc = eval_model(
            model, loss_fn, test_dataloader
        )
        e_losses.append(e_avg_loss)
        e_accs.append(e_accs)

    return torch.as_tensor(t_losses), torch.as_tensor(t_accs), torch.as_tensor(e_losses), torch.as_tensor(e_accs)

## Define your model

In [4]:
# use two parameters to create your model
# 1) the amount of hidden layers
# 2) the neurons per hidden layer

# we tune those two parameters with Optuna

class MyNetwork(nn.Module):
    def __init__(self, input_size, hidden_layers, neurons_per_layer, output_size):
        super(MyNetwork, self).__init__()

        # Input layer
        self.layers = nn.ModuleList([nn.Linear(input_size, neurons_per_layer)])
        
        # Hidden layers
        for _ in range(hidden_layers):
            self.layers.extend([nn.Linear(neurons_per_layer, neurons_per_layer)])

        # Output layer
        self.layers.append(nn.Linear(neurons_per_layer, output_size))

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = torch.relu(layer(x))
        
        return self.layers[-1](x)

In [5]:
from torch.nn import Linear

def build_model(trial, input_feat=784, output_feat=10):

    n_layers = trial.suggest_int("n_layers", 1, 5)
    n_neurons_per_layer = trial.suggest_int("n_neurons_per_layer", 10, 20)

    return MyNetwork(input_feat, n_layers, n_neurons_per_layer, output_feat)

## Define your objective with optuna hyperparameter tuning

In [6]:
# find the best hyperparameters for
# 1) the amount of hidden layers
# 2) the neurons per hidden layer
# 3) batch size
# 4) learning rate

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_fn = torch.nn.CrossEntropyLoss()
EPOCHS = 10

In [7]:
import optuna
from torch.optim import Adam
import tqdm


def objective(trial):
    model = build_model(trial).to(DEVICE)

    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size", [16, 32, 64, 128])

    optimizer = Adam(params=model.parameters(), lr=lr)

    train_loader, test_loader = load_mnist_data(batch_size=batch_size)

    for epoch in tqdm.tqdm(range(EPOCHS), desc='Iterating epoch'):
        # training and evaluation of your model
            for imgs, targets in tqdm.tqdm(train_loader, desc=f'Training iteration {epoch + 1}'):
                imgs, targets = imgs.to(device=DEVICE), targets.to(device=DEVICE)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(imgs.reshape(imgs.shape[0], -1))
                loss = loss_fn(outputs, targets)
                loss.backward()
                optimizer.step()
    
    with torch.no_grad():
        running_accuracy = []
        for imgs, targets in tqdm.tqdm(test_loader, desc=f'Training iteration {epoch + 1}'):
            imgs, targets = imgs.to(device=DEVICE), targets.to(device=DEVICE)
            outputs = model(imgs.reshape(imgs.shape[0], -1))
            max_outputs = torch.max(outputs, dim=1).indices
            accuracy = (max_outputs.detach() == targets.detach()).to(dtype=torch.float32).mean()
            running_accuracy.append(accuracy)
    
    return torch.tensor(running_accuracy).mean()


## Run the optuna study

In [None]:
from optuna.trial import TrialState

# Add stream handler of stdout to show the messages
study_name = "example-study"  # Unique identifier of the study.
storage_name = "sqlite:///{}.db".format(study_name)
study = optuna.create_study(study_name=study_name, storage=storage_name, direction='maximize')

study.optimize(objective, n_trials=15)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-12-19 19:38:44,061] A new study created in RDB with name: example-study
Iterating epoch:   0%|          | 0/10 [00:00<?, ?it/s]
Training iteration 1:   0%|          | 0/1875 [00:00<?, ?it/s][A
Training iteration 1:   0%|          | 1/1875 [00:00<21:12,  1.47it/s][A
Training iteration 1:   1%|          | 12/1875 [00:00<01:32, 20.11it/s][A
Training iteration 1:   1%|▏         | 28/1875 [00:00<00:39, 47.25it/s][A
Training iteration 1:   2%|▏         | 46/1875 [00:00<00:24, 75.93it/s][A
Training iteration 1:   3%|▎         | 64/1875 [00:01<00:18, 99.43it/s][A
Training iteration 1:   4%|▍         | 82/1875 [00:01<00:15, 117.46it/s][A
Training iteration 1:   5%|▌         | 100/1875 [00:01<00:13, 132.89it/s][A
Training iteration 1:   6%|▋         | 120/1875 [00:01<00:11, 148.45it/s][A
Training iteration 1:   7%|▋         | 137/1875 [00:01<00:11, 150.94it/s][A
Training iteration 1:   8%|▊         | 154/1875 [00:01<00:11, 144.64it/s][A
Training iteration 1:   9%|▉         | 1

## Open the optuna trial with optuna-dashboard

In [11]:
import optuna
import optuna.visualization as vis
import plotly.io as pio

study = optuna.load_study(study_name="example-study", storage="sqlite:///example-study.db")

print(f"Trials completados: {len(study.trials)}")
print(f"Mejor accuracy: {study.best_value:.4f}")
print(f"Mejores parámetros: {study.best_params}")

fig1 = vis.plot_optimization_history(study)
fig1.show()

fig2 = vis.plot_param_importances(study)
fig2.show()

fig3 = vis.plot_parallel_coordinate(study)
fig3.show()

Trials completados: 15
Mejor accuracy: 0.9469
Mejores parámetros: {'n_layers': 4, 'n_neurons_per_layer': 20, 'lr': 0.0015340529516182921, 'batch_size': 64}
