<a href="https://colab.research.google.com/github/TaiseiYamana/optuna_study/blob/main/optuna_tutorial_3_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 分散並列最適化用２番目のノートブック

In [1]:
!pip3 install optuna



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os

directory_path = './drive/MyDrive/optuna_colab_db/'
if not os.path.exists(directory_path):
    os.makedirs(directory_path)

In [4]:
import optuna
from optuna.trial import TrialState
from optuna.study import MaxTrialsCallback
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms
from optuna.pruners import NopPruner

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCHSIZE = 128
CLASSES = 10
DIR = os.getcwd()
EPOCHS = 10
N_TRAIN_EXAMPLES = BATCHSIZE * 50
N_VALID_EXAMPLES = BATCHSIZE * 10

def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    layers = []

    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 128)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.2, 0.5)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, CLASSES))
    layers.append(nn.LogSoftmax(dim=1))

    return nn.Sequential(*layers)

def get_mnist():
    # Load FashionMNIST dataset.
    train_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=True, download=True, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )
    valid_loader = torch.utils.data.DataLoader(
        datasets.FashionMNIST(DIR, train=False, transform=transforms.ToTensor()),
        batch_size=BATCHSIZE,
        shuffle=True,
    )

    return train_loader, valid_loader

def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_mnist()

    # Training of the model.
    for epoch in range(EPOCHS):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # Limiting training data for faster epochs.
            if batch_idx * BATCHSIZE >= N_TRAIN_EXAMPLES:
                break

            data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)

            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()

        # Validation of the model.
        model.eval()
        correct = 0
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valid_loader):
                # Limiting validation data.
                if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES:
                    break
                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                output = model(data)
                # Get the index of the max log-probability.
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES)

        #trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value.
        #if trial.should_prune():
        #    raise optuna.exceptions.TrialPruned()

    return accuracy

In [7]:
n_trials = 500
strage_name = "optuna_strage.sql"
study_name = 'example-study'
storage='sqlite:///' + directory_path + strage_name
print(f"storage: {storage}")
study = optuna.create_study(
    study_name = study_name,
    storage=storage,
    load_if_exists=True,
    pruner = NopPruner,
    direction='maximize',
    )

study.optimize(objective,
               n_trials=n_trials,
               callbacks=[MaxTrialsCallback(n_trials)],
               )


[I 2023-07-04 16:54:17,380] Using an existing study with name 'example-study' instead of creating a new one.


storage: sqlite:///./drive/MyDrive/optuna_colab_db/optuna_strage.sql


[I 2023-07-04 16:54:31,284] Trial 44 finished with value: 0.84296875 and parameters: {'dropout_l0': 0.21285494190883203, 'lr': 0.005210695429461801, 'n_layers': 1, 'n_units_l0': 48, 'optimizer': 'Adam'}. Best is trial 22 with value: 0.8484375.
[I 2023-07-04 16:54:44,880] Trial 45 finished with value: 0.83046875 and parameters: {'dropout_l0': 0.2068168799782223, 'lr': 0.0074855861644306885, 'n_layers': 1, 'n_units_l0': 46, 'optimizer': 'Adam'}. Best is trial 22 with value: 0.8484375.
[I 2023-07-04 16:54:59,238] Trial 46 finished with value: 0.825 and parameters: {'dropout_l0': 0.20120102835540674, 'lr': 0.00580748997082698, 'n_layers': 1, 'n_units_l0': 22, 'optimizer': 'Adam'}. Best is trial 22 with value: 0.8484375.
[I 2023-07-04 16:55:13,060] Trial 47 finished with value: 0.834375 and parameters: {'dropout_l0': 0.24959829715352855, 'lr': 0.0024383865002896668, 'n_layers': 1, 'n_units_l0': 37, 'optimizer': 'Adam'}. Best is trial 22 with value: 0.8484375.
[I 2023-07-04 16:55:27,010] Tri

In [8]:
print(f"Best objective value: {study.best_value}")
print(f"Best parameter: {study.best_params}")

Best objective value: 0.8703125
Best parameter: {'dropout_l0': 0.2742272213192275, 'lr': 0.0030442888165208176, 'n_layers': 1, 'n_units_l0': 78, 'optimizer': 'Adam'}


## 分散並列最適化失敗

共通のストレージで扱われてなく、同じ名前のストレージが複製されてた....  
共有ストレージにgoogle driveは使えないかも？  
<img src="https://github.com/TaiseiYamana/optuna_study/assets/54575368/9bcd8d0d-30b3-4713-97ae-1d0b5d908407">


kaggleにAWS RDSを使ったgoogle colagの分散並列化のサンプルがあるので
みなさん試してみてください。  
https://www.kaggle.com/code/anubhavchhabra/scaling-your-hyperparameter-search-using-optuna

<img src="https://iili.io/59qCLF.md.png:, width=500" alt="My Image" width=500>