## Optuna: hyper-parameters optimization

Code and tutos taken from: 

In [4]:
import os
import optuna
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms



### First example:

In [2]:
def objective(trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=100)

study.best_params  # E.g. {'x': 2.002108042}

[32m[I 2023-05-16 15:39:03,340][0m A new study created in memory with name: no-name-5fd9e933-2e4b-4690-9b0d-e0315be48b7b[0m
[32m[I 2023-05-16 15:39:03,347][0m Trial 0 finished with value: 18.893653106180896 and parameters: {'x': 6.3466830004246795}. Best is trial 0 with value: 18.893653106180896.[0m
[32m[I 2023-05-16 15:39:03,348][0m Trial 1 finished with value: 19.378618809957132 and parameters: {'x': 6.402115265410156}. Best is trial 0 with value: 18.893653106180896.[0m
[32m[I 2023-05-16 15:39:03,348][0m Trial 2 finished with value: 11.243607299231527 and parameters: {'x': 5.3531488632674105}. Best is trial 2 with value: 11.243607299231527.[0m
[32m[I 2023-05-16 15:39:03,349][0m Trial 3 finished with value: 21.71044625420755 and parameters: {'x': 6.659446990170352}. Best is trial 2 with value: 11.243607299231527.[0m
[32m[I 2023-05-16 15:39:03,350][0m Trial 4 finished with value: 3.0652381330285374 and parameters: {'x': 0.2492178510652625}. Best is trial 4 with value: 

{'x': 1.9910393545038703}

### Optuna for PyTorch:

In [5]:
"""
Optuna example that optimizes multi-layer perceptrons using PyTorch.

In this example, we optimize the validation accuracy of fashion product recognition using
PyTorch and FashionMNIST. We optimize the neural network architecture as well as the optimizer
configuration. As it is too time consuming to use the whole FashionMNIST dataset,
we here use a small subset of it.

"""

DEVICE = torch.device("cpu")
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 30
N_VALID_EXAMPLES = BATCHSIZE * 10


def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)


def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader


def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy


if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2023-05-16 15:49:50,229][0m A new study created in memory with name: no-name-7f511aed-e995-412b-aec3-5e5df9381798[0m


Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:05<00:00, 5213263.98it/s]


Extracting c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\train-images-idx3-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 2635335.45it/s]


Extracting c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\train-labels-idx1-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 4092925.45it/s]


Extracting c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 5147145.89it/s]


Extracting c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to c:\Users\Bastien\Documents\GitHub\Package_test\FashionMNIST\raw



[32m[I 2023-05-16 15:50:05,267][0m Trial 0 finished with value: 0.703125 and parameters: {'n_layers': 2, 'n_units_l0': 83, 'dropout_l0': 0.21184075634012306, 'n_units_l1': 13, 'dropout_l1': 0.42734728789034504, 'optimizer': 'Adam', 'lr': 0.02477645026468584}. Best is trial 0 with value: 0.703125.[0m
[32m[I 2023-05-16 15:50:11,786][0m Trial 1 finished with value: 0.74453125 and parameters: {'n_layers': 1, 'n_units_l0': 52, 'dropout_l0': 0.36209938991456725, 'optimizer': 'RMSprop', 'lr': 0.0002070729127908854}. Best is trial 1 with value: 0.74453125.[0m
[32m[I 2023-05-16 15:50:22,071][0m Trial 2 finished with value: 0.290625 and parameters: {'n_layers': 2, 'n_units_l0': 20, 'dropout_l0': 0.42470852492108613, 'n_units_l1': 99, 'dropout_l1': 0.22937070167680376, 'optimizer': 'RMSprop', 'lr': 0.02578205074605253}. Best is trial 1 with value: 0.74453125.[0m
[32m[I 2023-05-16 15:50:29,024][0m Trial 3 finished with value: 0.07109375 and parameters: {'n_layers': 1, 'n_units_l0': 111,

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  60
  Number of complete trials:  40
Best trial:
  Value:  0.84140625
  Params: 
    n_layers: 1
    n_units_l0: 56
    dropout_l0: 0.3640354622160226
    optimizer: Adam
    lr: 0.007167420981423627
