In [1]:
import pandas as pd
import math
from sklearn.preprocessing import MinMaxScaler
import re
import numpy as np
from tqdm import tqdm
# pytorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from ChessDataset import ChessDataset
from ChessNet import Net
from torch.optim import lr_scheduler
import optuna
import matplotlib.pyplot as plt
from util import get_device, load_data, get_datalaoder

from optuna.trial import TrialState

In [2]:
BATCH_SIZE = 64
EPOCHS = 20
N_TRAIN_EXAMPLES = 20000
N_VALID_EXAMPLES = 5000

DEVICE = get_device()
DEVICE

device(type='cuda')

In [3]:
chess_data = load_data()

In [4]:
def define_model(trial):
    # We optimize the number of layers, hidden units and dropout ratio in each layer.
    n_layers = trial.suggest_int("n_layers", 1, 4)
    layers = []

    in_features = 13 * 8 * 8
    for i in range(n_layers):
        out_features = trial.suggest_int("n_units_l{}".format(i), 32, 1024)
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.ReLU())
        p = trial.suggest_float("dropout_l{}".format(i), 0.1, 0.4)
        layers.append(nn.Dropout(p))

        in_features = out_features
    layers.append(nn.Linear(in_features, 1))

    return nn.Sequential(*layers)

In [5]:
def objective(trial):
    # Generate the model.
    model = define_model(trial).to(DEVICE)

    # Generate the optimizers.
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD", "AdamW"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=N_TRAIN_EXAMPLES, epochs=EPOCHS)
    criterion = nn.MSELoss()

    # Get the FashionMNIST dataset.
    train_loader, valid_loader = get_datalaoder(chess_data, BATCH_SIZE)
    val_loss = 999999999999
    # Training of the model.
    with tqdm(total=EPOCHS, desc="Training") as pbar:
        for epoch in range(EPOCHS):
            model.train()
            for batch_idx, (data, target) in enumerate(train_loader):
                # Limiting training data for faster epochs.
                if batch_idx * BATCH_SIZE >= N_TRAIN_EXAMPLES:
                    break

                data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                optimizer.zero_grad()
                output = model(data.float())
                loss = criterion(output, target)
                if batch_idx % 100 == 0:
                    pbar.set_postfix({'Sample': f'{batch_idx*BATCH_SIZE}/{N_TRAIN_EXAMPLES}', 'Train-Loss': f'{loss.item()/BATCH_SIZE:.5f}'})
                loss.backward()
                optimizer.step()
                scheduler.step()

            # Validation of the model.
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for batch_idx, (data, target) in enumerate(valid_loader):
                    # Limiting validation data.
                    if batch_idx * BATCH_SIZE >= N_VALID_EXAMPLES:
                        break
                    data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE)
                    output = model(data.float())
                    loss = criterion(output, target)
                    val_loss += loss.item()
                    if batch_idx % 100 == 0:
                        pbar.set_postfix({'Val_sample': f'{batch_idx*BATCH_SIZE}/{N_VALID_EXAMPLES}', 'Val-Loss': f'{loss.item():.5f}', 'Val-Loss_avg': f'{val_loss/(batch_idx+1):.5f}'})

            val_loss = val_loss / N_VALID_EXAMPLES

            trial.report(val_loss, epoch)

            # Handle pruning based on the intermediate value.
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
            pbar.update(1)

    return val_loss

In [6]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-11-11 23:50:05,032][0m A new study created in memory with name: no-name-b2db823f-9dfd-450d-a34e-ce46074f5c9e[0m
Training: 100%|██████████| 20/20 [00:17<00:00,  1.14it/s, Val_sample=0/5000, Val-Loss=31.04224, Val-Loss_avg=31.04224]
[32m[I 2022-11-11 23:50:25,738][0m Trial 0 finished with value: 0.4564515966415405 and parameters: {'n_layers': 1, 'n_units_l0': 218, 'dropout_l0': 0.25787537793675763, 'optimizer': 'SGD', 'lr': 3.4760206044649325e-05}. Best is trial 0 with value: 0.4564515966415405.[0m
Training: 100%|██████████| 20/20 [00:19<00:00,  1.04it/s, Val_sample=0/5000, Val-Loss=3.78852, Val-Loss_avg=3.78852] 
[32m[I 2022-11-11 23:50:45,162][0m Trial 1 finished with value: 0.26067745633125305 and parameters: {'n_layers': 2, 'n_units_l0': 351, 'dropout_l0': 0.3904752189894659, 'n_units_l1': 628, 'dropout_l1': 0.37282285879269417, 'optimizer': 'SGD', 'lr': 0.04522108580639937}. Best is trial 1 with value: 0.26067745633125305.[0m
Training: 100%|██████████| 20/20 [00

Study statistics: 
  Number of finished trials:  10
  Number of pruned trials:  4
  Number of complete trials:  6
Best trial:
  Value:  0.2006756387233734
  Params: 
    n_layers: 3
    n_units_l0: 382
    dropout_l0: 0.19146414512011245
    n_units_l1: 439
    dropout_l1: 0.16513089823718002
    n_units_l2: 710
    dropout_l2: 0.15374156984513637
    optimizer: Adam
    lr: 0.007759080345587955


In [7]:
complete_trials

[FrozenTrial(number=0, values=[0.4564515966415405], datetime_start=datetime.datetime(2022, 11, 11, 23, 50, 5, 34192), datetime_complete=datetime.datetime(2022, 11, 11, 23, 50, 25, 737949), params={'n_layers': 1, 'n_units_l0': 218, 'dropout_l0': 0.25787537793675763, 'optimizer': 'SGD', 'lr': 3.4760206044649325e-05}, distributions={'n_layers': IntDistribution(high=4, log=False, low=1, step=1), 'n_units_l0': IntDistribution(high=1024, log=False, low=32, step=1), 'dropout_l0': FloatDistribution(high=0.4, log=False, low=0.1, step=None), 'optimizer': CategoricalDistribution(choices=('Adam', 'RMSprop', 'SGD', 'AdamW')), 'lr': FloatDistribution(high=0.1, log=True, low=1e-05, step=None)}, user_attrs={}, system_attrs={}, intermediate_values={0: 0.45697897119522096, 1: 0.45694997272491455, 2: 0.45692034797668457, 3: 0.45688678941726685, 4: 0.4568616651535034, 5: 0.45683273706436156, 6: 0.45681643686294554, 7: 0.456786745262146, 8: 0.4567595580101013, 9: 0.45673000860214236, 10: 0.4567056228637695