In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import optuna
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
class Net(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, num_layers_hidden1: int, hidden_dim1: int, num_layers_hidden2: int, hidden_dim2: int) -> None:
        super(Net, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim1))
        layers.append(nn.ReLU())

        for _ in range(num_layers_hidden1 - 1):
            layers.append(nn.Linear(hidden_dim1, hidden_dim1))
            layers.append(nn.ReLU())
        
        layers.append(nn.Linear(hidden_dim1, hidden_dim2))  # Ensure transition
        layers.append(nn.ReLU())

        for _ in range(num_layers_hidden2 - 1):  # Add more layers dynamically
            layers.append(nn.Linear(hidden_dim2, hidden_dim2))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(hidden_dim2, output_dim))  # Final output layer
        self.network = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.view(-1, 28 * 28)
        return self.network(x)

In [3]:

def objective(trial):
    # Optimize number of hidden layers (1 to 5)
    num_layers_hidden1 = trial.suggest_int("num_layers_hidden1", 1, 5)
    num_layers_hidden2 = trial.suggest_int("num_layers_hidden2", 1, 5)
    hidden_dim1 = trial.suggest_int("hidden_dim1", 32, 64)
    hidden_dim2 = trial.suggest_int("hidden_dim2", 128, 256)
    lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)

    # Load dataset
    transform = transforms.ToTensor()
    train_dataset = datasets.MNIST(root="./data", train=True, transform=transform, download=True)
    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

    # Create the model
    model = Net(input_dim=28*28, output_dim=10, num_layers_hidden1=num_layers_hidden1, hidden_dim1=hidden_dim1,
                num_layers_hidden2=num_layers_hidden2, hidden_dim2=hidden_dim2)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Training loop (1 epoch)
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if batch_idx >= 100:  # Limit iterations for speed
            break

    return total_loss / len(train_loader)


In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100)

[I 2025-03-27 00:43:14,922] A new study created in memory with name: no-name-c9c8de7e-596e-4f71-99e2-86cceb969f0c
  lr = trial.suggest_loguniform("lr", 1e-4, 1e-2)
[I 2025-03-27 00:43:17,279] Trial 0 finished with value: 0.2435633447378683 and parameters: {'num_layers_hidden1': 4, 'num_layers_hidden2': 4, 'hidden_dim1': 45, 'hidden_dim2': 192, 'lr': 0.00020211460735154193}. Best is trial 0 with value: 0.2435633447378683.
[I 2025-03-27 00:43:19,744] Trial 1 finished with value: 0.11008380391577413 and parameters: {'num_layers_hidden1': 4, 'num_layers_hidden2': 1, 'hidden_dim1': 44, 'hidden_dim2': 221, 'lr': 0.005098579499356211}. Best is trial 1 with value: 0.11008380391577413.
[I 2025-03-27 00:43:22,321] Trial 2 finished with value: 0.13979750147252196 and parameters: {'num_layers_hidden1': 2, 'num_layers_hidden2': 4, 'hidden_dim1': 60, 'hidden_dim2': 229, 'lr': 0.007830167847039394}. Best is trial 1 with value: 0.11008380391577413.
[I 2025-03-27 00:43:24,796] Trial 3 finished with val

In [None]:
# Print the best hyperparameters
print("Best number of hidden layers in group 1:", study.best_params["num_layers_hidden1"])
print("Best number of hidden layers in group 2:", study.best_params["num_layers_hidden2"])
print("Best hidden dimensions: hidden_dim1 =", study.best_params["hidden_dim1"], 
"hidden_dim2 =", study.best_params["hidden_dim2"])
print("Best learning rate:", study.best_params["lr"])

In [None]:
study.best_value