In [1]:
import argparse
import time
import math
from functools import partial

import numpy as np
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm

In [2]:
def count_parameters(model: nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def measure_inference_time(model: nn.Module, device, loader, n_batches=10):
    model.eval()
    torch.cuda.synchronize() if device.type == 'cuda' else None
    start = time.time()
    it = iter(loader)
    for _ in range(min(n_batches, len(loader))):
        try:
            x, _ = next(it)
        except StopIteration:
            break
        x = x.to(device)
        with torch.no_grad():
            _ = model(x)
    torch.cuda.synchronize() if device.type == 'cuda' else None
    end = time.time()
    return (end - start) / max(1, min(n_batches, len(loader)))

class SimpleMLP(nn.Module):
    def __init__(self, input_dim, hidden_sizes, num_classes, dropout):
        super().__init__()
        layers = []
        last = input_dim
        for h in hidden_sizes:
            layers.append(nn.Linear(last, h))
            layers.append(nn.ReLU())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            last = h
        layers.append(nn.Linear(last, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        if x.dim() > 2:
            x = x.view(x.size(0), -1)
        return self.net(x)

import torch
import torch.nn as nn

class SimpleCNN(nn.Module):
    def __init__(self, input_shape=(1, 28, 28), num_classes=10, base_channels=32, fc_units=100):
        super(SimpleCNN, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(1, base_channels, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        # calcula o tamanho da saída após convolução
        with torch.no_grad():
            dummy = torch.zeros(1, *input_shape)  # ex: (1, 1, 28, 28) para MNIST
            conv_out = self.conv(dummy)
            conv_out_dim = conv_out.view(1, -1).size(1)
        
        self.fc = nn.Sequential(
            nn.Linear(conv_out_dim, fc_units),
            nn.ReLU(),
            nn.Linear(fc_units, num_classes)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)  # achata
        return self.fc(x)


def train_one_epoch(model, device, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * x.size(0)
    return running_loss / len(loader.dataset)

def evaluate(model, device, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            out = model(x)
            pred = out.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)
    return correct / total

def objective_multi(trial, args, train_loader, val_loader, device):
    model_type = trial.suggest_categorical('model_type', ['mlp', 'cnn'])

    if model_type == 'mlp':
        n_layers = trial.suggest_int('n_layers', 1, 4)
        hidden_sizes = [trial.suggest_int(f'n_units_l{i}', 32, 512) for i in range(n_layers)]
        dropout = trial.suggest_float('dropout', 0.0, 0.5)
    else:
        base_channels = trial.suggest_categorical('base_channels', [16, 32, 64])
        num_conv_layers = trial.suggest_int('num_conv_layers', 1, 3)
        fc_units = trial.suggest_int('fc_units', 64, 512)
        dropout = trial.suggest_float('dropout', 0.0, 0.5)

    lr = trial.suggest_loguniform('lr', 1e-4, 1e-1)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    optimizer_name = trial.suggest_categorical('optimizer', ['adam', 'sgd'])
    epochs = args.epochs_per_trial

    if model_type == 'mlp':
        example_x, _ = next(iter(train_loader))
        input_dim = int(np.prod(example_x.shape[1:]))
        num_classes = len(train_loader.dataset.classes) if hasattr(train_loader.dataset, 'classes') else 10
        model = SimpleMLP(input_dim, hidden_sizes, num_classes, dropout)
    else:
        in_ch = train_loader.dataset[0][0].shape[0]
        num_classes = len(train_loader.dataset.classes) if hasattr(train_loader.dataset, 'classes') else 10
        input_shape = train_loader.dataset[0][0].shape  # exemplo: torch.Size([1, 28, 28])
        num_classes = len(train_loader.dataset.classes) if hasattr(train_loader.dataset, 'classes') else 10
        
        model = SimpleCNN(input_shape, num_classes, base_channels, fc_units)

    device = device
    model.to(device)

    params = count_parameters(model)

    if optimizer_name == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    criterion = nn.CrossEntropyLoss()

    start_time = time.time()
    for ep in range(epochs):
        train_one_epoch(model, device, train_loader, optimizer, criterion)
    train_time = time.time() - start_time

    acc = evaluate(model, device, val_loader)

    inf_time = measure_inference_time(model, device, val_loader, n_batches=5)

    return -acc, train_time, float(params)


def objective_weighted(trial, args, train_loader, val_loader, device):
    res = objective_multi(trial, args, train_loader, val_loader, device)
    neg_acc, train_time, params = res
    acc = -neg_acc
    acc_norm = acc  # já entre 0 e 1
    time_norm = train_time / (args.max_time_hint if args.max_time_hint > 0 else 100.0)
    params_norm = params / (args.max_params_hint if args.max_params_hint > 0 else 1e6)
    score = -(args.w_acc * acc_norm) + args.w_time * time_norm + args.w_params * params_norm
    return float(score)

def get_dataloaders(dataset_name, batch_size):
    if dataset_name == 'mnist':
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
        train = datasets.MNIST('./data', train=True, download=True, transform=transform)
        test = datasets.MNIST('./data', train=False, download=True, transform=transform)
        n = len(train)
        n_val = int(0.1 * n)
        n_train = n - n_val
        train_set, val_set = torch.utils.data.random_split(train, [n_train, n_val])
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)
        return train_loader, val_loader, test_loader

    elif dataset_name == 'cifar10':
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
        train = datasets.CIFAR10('./data', train=True, download=True, transform=transform)
        test = datasets.CIFAR10('./data', train=False, download=True, transform=transform)
        n = len(train)
        n_val = int(0.1 * n)
        n_train = n - n_val
        train_set, val_set = torch.utils.data.random_split(train, [n_train, n_val])
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test, batch_size=batch_size, shuffle=False)
        return train_loader, val_loader, test_loader

    else:
        raise ValueError('Dataset não suportado no exemplo. Forneça seu DataLoader customizado.')


def run_search(args):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Usando device: {device}")

    train_loader, val_loader, test_loader = get_dataloaders(args.dataset, batch_size=64)

    if args.mode == 'pareto':
        study = optuna.create_study(directions=["minimize", "minimize", "minimize"])  # [-acc, time, params]
        objective = lambda trial: objective_multi(trial, args, train_loader, val_loader, device)
    else:
        study = optuna.create_study(direction='minimize')
        objective = lambda trial: objective_weighted(trial, args, train_loader, val_loader, device)

    print("Iniciando busca...")
    study.optimize(objective, n_trials=args.n_trials)

    if args.mode == 'pareto':
        print("Fronteira de Pareto encontrada (objetivos: -acc, time(s), params):")
        for t in study.best_trials:
            print(t.values, t.params)
    else:
        print("Melhor trial (score ponderado):")
        print(study.best_trial.value, study.best_trial.params)

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['pareto', 'weighted'], default='pareto')
parser.add_argument('--dataset', choices=['mnist', 'cifar10'], default='mnist')
parser.add_argument('--n-trials', type=int, dest='n_trials', default=30)
parser.add_argument('--epochs-per-trial', type=int, dest='epochs_per_trial', default=3)
parser.add_argument('--max-time-hint', type=float, dest='max_time_hint', default=200.0)
parser.add_argument('--max-params-hint', type=float, dest='max_params_hint', default=5e6)
parser.add_argument('--w-acc', type=float, dest='w_acc', default=0.6)
parser.add_argument('--w-time', type=float, dest='w_time', default=0.2)
parser.add_argument('--w-params', type=float, dest='w_params', default=0.2)

args, _ = parser.parse_known_args()
run_search(args)

[I 2025-08-30 00:49:57,613] A new study created in memory with name: no-name-d13c3867-1de9-43eb-84f8-82152d07ddfa


Usando device: cpu
Iniciando busca...


  lr = trial.suggest_loguniform('lr', 1e-4, 1e-1)
[I 2025-08-30 00:51:13,579] Trial 0 finished with values: [-0.9756666666666667, 74.36169624328613, 1759570.0] and parameters: {'model_type': 'cnn', 'base_channels': 32, 'num_conv_layers': 1, 'fc_units': 280, 'dropout': 0.20314273690252904, 'lr': 0.008361951953659602, 'batch_size': 32, 'optimizer': 'adam'}.
[I 2025-08-30 00:51:51,705] Trial 1 finished with values: [-0.9731666666666666, 36.919607400894165, 435318.0] and parameters: {'model_type': 'mlp', 'n_layers': 2, 'n_units_l0': 450, 'n_units_l1': 178, 'dropout': 0.23228936881575785, 'lr': 0.0005512412046334231, 'batch_size': 64, 'optimizer': 'adam'}.
[I 2025-08-30 00:53:17,228] Trial 2 finished with values: [-0.9828333333333333, 83.90655469894409, 1941777.0] and parameters: {'model_type': 'cnn', 'base_channels': 32, 'num_conv_layers': 2, 'fc_units': 309, 'dropout': 0.21845240028006596, 'lr': 0.0023806116970756367, 'batch_size': 64, 'optimizer': 'adam'}.
[I 2025-08-30 00:53:51,540] Tri

Fronteira de Pareto encontrada (objetivos: -acc, time(s), params):
[-0.973, 33.15312886238098, 380020.0] {'model_type': 'mlp', 'n_layers': 1, 'n_units_l0': 478, 'dropout': 0.00799212280636924, 'lr': 0.008718214465614169, 'batch_size': 32, 'optimizer': 'sgd'}
[-0.9751666666666666, 42.369699001312256, 211019.0] {'model_type': 'cnn', 'base_channels': 16, 'num_conv_layers': 2, 'fc_units': 67, 'dropout': 0.048199285791166835, 'lr': 0.0002869529650300838, 'batch_size': 32, 'optimizer': 'adam'}
[-0.9846666666666667, 44.468268394470215, 613835.0] {'model_type': 'cnn', 'base_channels': 16, 'num_conv_layers': 2, 'fc_units': 195, 'dropout': 0.14128024841085507, 'lr': 0.03108445617977969, 'batch_size': 128, 'optimizer': 'sgd'}
[-0.9676666666666667, 38.2529456615448, 264791.0] {'model_type': 'mlp', 'n_layers': 4, 'n_units_l0': 77, 'n_units_l1': 386, 'n_units_l2': 208, 'n_units_l3': 428, 'dropout': 0.06558288123757805, 'lr': 0.0002782484083297675, 'batch_size': 32, 'optimizer': 'adam'}
[-0.984833333