In [1]:
import os
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
import random
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import chess

In [2]:
device = "cpu"

In [3]:
def prepare_data(df, batch_size):
    targets_numpy = df.result.values
    features_numpy = df.loc[:,df.columns != "result"].values

    features_train, features_test, targets_train, targets_test = train_test_split(
        features_numpy,
        targets_numpy,
        test_size = 0.2,
        random_state = random.randint(0,100)
    )

    featuresTrain = torch.from_numpy(features_train).type(torch.float32)
    targetsTrain = torch.from_numpy(targets_train).type(torch.float32)

    featuresTest = torch.from_numpy(features_test).type(torch.float32)
    targetsTest = torch.from_numpy(targets_test).type(torch.float32)

    # Pytorch train and test sets
    train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
    test = torch.utils.data.TensorDataset(featuresTest,targetsTest)

    # data loader
    train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)
    test_loader = DataLoader(test, batch_size = batch_size, shuffle = True)

    return train_loader, test_loader

In [4]:
train_index = 28
batch_size = 128
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_t1/bitboard_28.csv")
train_loader, _ = prepare_data(train_df, batch_size)

TRAIN_INDEX: 28


In [5]:
train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1079,1080,1081,1082,1083,1084,1085,1086,1087,result
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0
127996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0
127997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0
127998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0


In [6]:
def lr_finder(model, optimizer, train_loader, lr_range, steps):
    loss_list = []
    error = nn.MSELoss()

    for lr in tqdm(torch.logspace(lr_range[0], lr_range[1], steps=steps)):
        optimizer.param_groups[0]['lr'] = lr
        temp_loss_list = []

        for boards, labels in train_loader:
            train = boards.view(-1, 1088).to(device)
            labels = labels.view(-1, 1).to(device)
            optimizer.zero_grad()
            outputs = model(train).to(device)
            loss = error(outputs, labels)
            temp_loss_list.append(loss.detach().item())
            loss.backward()
            optimizer.step()

        temp_loss = np.mean(temp_loss_list)
        loss_list.append(temp_loss)

    return loss_list

In [7]:
class NewANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob):
        super(NewANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.bn1 = nn.BatchNorm1d(hd_0)
        self.dropout1 = nn.Dropout(p=drop_prob)
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.bn2 = nn.BatchNorm1d(hd_1)
        self.dropout2 = nn.Dropout(p=drop_prob)
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.bn3 = nn.BatchNorm1d(hd_2)
        self.dropout3 = nn.Dropout(p=drop_prob)
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.bn4 = nn.BatchNorm1d(hd_3)
        self.dropout4 = nn.Dropout(p=drop_prob)
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.bn5 = nn.BatchNorm1d(hd_4)
        self.dropout5 = nn.Dropout(p=drop_prob)
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.bn6 = nn.BatchNorm1d(hd_5)
        self.dropout6 = nn.Dropout(p=drop_prob)
        self.fc7 = nn.Linear(hd_5, hd_6)
        self.bn7 = nn.BatchNorm1d(hd_6)
        self.dropout7 = nn.Dropout(p=drop_prob)
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.bn8 = nn.BatchNorm1d(hd_7)
        self.dropout8 = nn.Dropout(p=drop_prob)
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        x = torch.relu(self.bn4(self.fc4(x)))
        x = self.dropout4(x)
        x = torch.relu(self.bn5(self.fc5(x)))
        x = self.dropout5(x)
        x = torch.relu(self.bn6(self.fc6(x)))
        x = self.dropout6(x)
        x = torch.relu(self.bn7(self.fc7(x)))
        x = self.dropout7(x)
        x = torch.relu(self.bn8(self.fc8(x)))
        x = self.dropout8(x)

        x = torch.tanh(self.fc9(x))
        return x

In [13]:
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
output_dim = 1
drop_prob = 0.2
weight_decay = 0.001
batch_size = 128

loss_dict = {}

optimizer_dict = {
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSProp": torch.optim.RMSprop,
    "RProp": torch.optim.Rprop,
    "SGD": torch.optim.SGD
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = NewANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob)
    model.to(device)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9, weight_decay = weight_decay)
    elif algo_name == "LBFGS":
        optimizer = optimizer_class(model.parameters(), lr=1e-6)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6, weight_decay = weight_decay)

    loss_list = lr_finder(model, optimizer, train_loader, (-6, 0), 50)
    loss_dict[algo_name] = loss_list

NAdam


 12%|█████▎                                      | 6/50 [05:20<39:13, 53.49s/it]


KeyboardInterrupt: 

In [None]:
df = pd.DataFrame(loss_dict)
lr_list = torch.logspace(-6, 0, steps=100)
lr_list = [tensor.detach().item() for tensor in lr_list]
df['LR'] = lr_list
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker = "o")
plt.show()