1. Learning rate finder
2. Train each Optimizer
3. Drop the last optimzier for the next architecture
4. Inrease the number of hidden layers

To implement
1. Dropout
2. Weight Decay
3. LR scheduler
4. Train with one csv and test on another csv

In [5]:
import os
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
import random
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import chess

In [6]:
csvs = []
for csv in os.listdir("./lichess/data_1"):
    if "csv" in csv:
        csvs.append(csv)
len(csvs)

56

In [7]:
def prepare_data(df, batch_size):
    targets_numpy = df.result.values
    features_numpy = df.loc[:,df.columns != "result"].values

    features_train, features_test, targets_train, targets_test = train_test_split(
        features_numpy,
        targets_numpy,
        test_size = 0.2,
        random_state = random.randint(0,100)
    )

    featuresTrain = torch.from_numpy(features_train).type(torch.float32)
    targetsTrain = torch.from_numpy(targets_train).type(torch.float32)

    featuresTest = torch.from_numpy(features_test).type(torch.float32)
    targetsTest = torch.from_numpy(targets_test).type(torch.float32)

    # Pytorch train and test sets
    train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
    test = torch.utils.data.TensorDataset(featuresTest,targetsTest)

    # data loader
    train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)
    test_loader = DataLoader(test, batch_size = batch_size, shuffle = True)

    return train_loader, test_loader

In [None]:
def lr_finder(model, optimizer, train_loader, lr_range, steps):
    loss_list = []    
    error = nn.L1Loss()

    for lr in tqdm(torch.logspace(lr_range[0], lr_range[1], steps=steps)):
        optimizer.param_groups[0]['lr'] = lr
        temp_loss_list = []
        
        for boards, labels in train_loader:
            train = boards.view(-1, 1088)
            labels = labels.view(-1, 1)
            optimizer.zero_grad()
            outputs = model(train)
            loss = error(outputs, labels)
            temp_loss_list.append(loss.detach().item())
            loss.backward()
            optimizer.step()
        
        temp_loss = np.mean(temp_loss_list)
        loss_list.append(temp_loss)
    
    return loss_list

In [None]:
lr_range = [-6, 0]
lr_list = torch.logspace(lr_range[0], lr_range[1], steps=100)
lr_list = [tensor.detach().item() for tensor in lr_list]
lr_list

In [None]:
# def train(train_loader, test_loader, model, optimizer, stop):
#     error = nn.L1Loss()
#     train_loss_list = []
#     val_loss_list = []
#     min_loss = float('inf')
#     epoch_count = 1
#     stop_count = 0
    
#     while True:
#         temp_loss_list = []
        
#         for boards, labels in train_loader:
#             train = boards.view(-1, 1088)
#             labels = labels.view(-1, 1)
#             optimizer.zero_grad()
#             outputs = model(train)
#             loss = error(outputs, labels)
#             temp_loss_list.append(loss.detach().item())
#             loss.backward()
#             optimizer.step()
        
#         train_loss = np.mean(temp_loss_list)
#         train_loss_list.append(train_loss)
#         temp_loss_list = []

#         for boards, labels in test_loader:
#             test = boards.view(-1, 1088)
#             outputs = model(test)
#             temp_loss_list.append(error(outputs, labels.view(-1, 1)).detach().item())
        
#         val_loss = np.mean(temp_loss_list)
#         val_loss_list.append(val_loss)
        
#         if val_loss < min_loss:
#             min_loss = val_loss
#             stop_count = 0
#         else:
#             stop_count += 1

#         print(f"EPOCH: {epoch_count}, TRAIN_LOSS: {round(float(train_loss), 4)}, VAL:_LOSS: {round(float(val_loss), 4)}, STOP_COUNT: {stop_count}")
#         epoch_count += 1

#         if stop_count >= stop:
#             print(f"MIN_VAL_LOSS: {min(val_loss_list)}")
#             break
    
#     return train_loss_list, val_loss_list

# Hidden Layer 1

In [None]:
class ANNModel1(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, output_dim):
        super(ANNModel1, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN1 lr finder
input_dim = 1088
hidden_dim_0 = 512
output_dim = 1
batch_size = 128

train_index = random.randint(0,55)
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "Adadelta": torch.optim.Adadelta,
    "Adagrad": torch.optim.Adagrad,
    "Adam": torch.optim.Adam,
    "Adamax": torch.optim.Adamax,
    "ASGD": torch.optim.ASGD,
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSprop": torch.optim.RMSprop,
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel1(input_dim, hidden_dim_0, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    loss_list = lr_finder(model, optimizer, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_1.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_1.csv")
df

In [None]:
idx = 9
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "Adadelta": (torch.optim.Adadelta, 0.1),
    "Adagrad": (torch.optim.Adagrad, 0.01),
    "Adam": (torch.optim.Adam, 1e-4),
    "Adamax": (torch.optim.Adamax, 1e-4),
    "ASGD": (torch.optim.ASGD, 0.1),
    "NAdam": (torch.optim.NAdam, 1e-4),
    "RAdam": (torch.optim.RAdam, 1e-4),
    "RMSprop": (torch.optim.RMSprop, 1e-4),
    "SGD": (torch.optim.SGD, 0.1),
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel1(input_dim, hidden_dim_0, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, 10)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_1.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_1.csv')
df

In [None]:
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.plot(df[keys[idx]], marker='o')
plt.show()

# Hidden Layer 2

In [None]:
class ANNModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, output_dim):
        super(ANNModel2, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN2 lr finder
# Drop ASGD
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "Adadelta": torch.optim.Adadelta,
    "Adagrad": torch.optim.Adagrad,
    "Adam": torch.optim.Adam,
    "Adamax": torch.optim.Adamax,
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSprop": torch.optim.RMSprop,
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel2(input_dim, hidden_dim_0, hidden_dim_1, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    loss_list = lr_finder(model, optimizer, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_2.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_2.csv")
df

In [None]:
idx = 9
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "Adadelta": (torch.optim.Adadelta, 0.1),
    "Adagrad": (torch.optim.Adagrad, 0.001),
    "Adam": (torch.optim.Adam, 1e-5),
    "Adamax": (torch.optim.Adamax, 1e-4),
    "NAdam": (torch.optim.NAdam, 1e-5),
    "RAdam": (torch.optim.RAdam, 1e-5),
    "RMSprop": (torch.optim.RMSprop, 1e-5),
    "SGD": (torch.optim.SGD, 0.1),
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel2(input_dim, hidden_dim_0, hidden_dim_1, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, 10)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_2.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_2.csv')
df

In [None]:
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.plot(df[keys[idx]], marker='o')
plt.show()

# Hidden Layer 3

In [None]:
class ANNModel3(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim):
        super(ANNModel3, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, hidden_dim_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hidden_dim_2, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN3 lr finder
# Drop ASGD
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "Adagrad": torch.optim.Adagrad,
    "Adam": torch.optim.Adam,
    "Adamax": torch.optim.Adamax,
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSprop": torch.optim.RMSprop,
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel3(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    loss_list = lr_finder(model, optimizer, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_3.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_3.csv")
df

In [None]:
idx = 7
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN3 train
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "Adagrad": (torch.optim.Adagrad, 0.001),
    "Adam": (torch.optim.Adam, 1e-5),
    "Adamax": (torch.optim.Adamax, 5e-5),
    "NAdam": (torch.optim.NAdam, 1e-5),
    "RAdam": (torch.optim.RAdam, 1e-5),
    "RMSprop": (torch.optim.RMSprop, 1e-5),
    "SGD": (torch.optim.SGD, 0.1),
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel3(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, 10)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_3.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_3.csv')
df

In [None]:
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.plot(df[keys[idx]], marker='o')
plt.show()

# Hidden Layer 4

In [None]:
class ANNModel4(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, output_dim):
        super(ANNModel4, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN4 lr finder
# Drop ASGD
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
hidden_dim_3 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "Adagrad": torch.optim.Adagrad,
    "Adamax": torch.optim.Adamax,
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSprop": torch.optim.RMSprop,
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel4(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, hidden_dim_3, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    loss_list = lr_finder(model, optimizer, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_4.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_4.csv")
df

In [None]:
idx = 6
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN4 train
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
hidden_dim_3 = 512
output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "Adagrad": (torch.optim.Adagrad, 0.001),
    "Adamax": (torch.optim.Adamax, 5e-5),
    "NAdam": (torch.optim.NAdam, 1e-5),
    "RAdam": (torch.optim.RAdam, 1e-5),
    "RMSprop": (torch.optim.RMSprop, 1e-5),
    "SGD": (torch.optim.SGD, 0.1),
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel4(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, hidden_dim_3, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, 10)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_4.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_4.csv')
df

In [None]:
idx = 6
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.plot(df[keys[idx]], marker='o')
plt.show()

# Hidden Layer 5

In [None]:
class ANNModel5(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim):
        super(ANNModel5, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.relu5 = nn.ReLU()
        
        self.fc6 = nn.Linear(hd_4, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)
        x = self.relu5(x)
        
        x = self.fc6(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN5 lr finder
# Drop ASGD
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "Adamax": torch.optim.Adamax,
    "NAdam": torch.optim.NAdam,
    "RAdam": torch.optim.RAdam,
    "RMSprop": torch.optim.RMSprop,
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel5(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    loss_list = lr_finder(model, optimizer, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_5.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_5.csv")
df

In [None]:
idx = 6
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN5 train
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512

output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "Adagrad": (torch.optim.Adagrad, 0.001),
    "Adamax": (torch.optim.Adamax, 5e-5),
    "NAdam": (torch.optim.NAdam, 1e-5),
    "RAdam": (torch.optim.RAdam, 1e-5),
    "RMSprop": (torch.optim.RMSprop, 1e-5),
    "SGD": (torch.optim.SGD, 0.1),
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel5(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, 10)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_5.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_5.csv')
df

In [None]:
loss_list = [np.mean(df[col].dropna()[-10:]) for col in df.columns]
loss_dict = dict(zip(df.columns, loss_list))
loss_dict = sorted(loss_dict.items(), key=lambda x: x[1])
loss_dict

In [None]:
idx = 5
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.plot(df[keys[idx]], marker='o')
plt.show()

# Hidden Layer 6

In [None]:
class ANNModel6(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim):
        super(ANNModel6, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)        
        self.fc7 = nn.Linear(hd_5, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        
        x = torch.sigmoid(self.fc7(x)) 
        return x

In [None]:
# ANN6 lr finder
# Drop ASGD
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel6(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6)
    
    loss_list = lr_finder(model, optimizer, train_loader, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_6.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_6.csv")
df

In [None]:
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN6 train
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
stop = 10

output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.3),
    "SGD-Momentum": (torch.optim.SGD, 0.03),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel6(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_6.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_6.csv')
df

In [None]:
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.yscale('log')
plt.plot(df[keys[0]], marker='o', label = 'SGD')
plt.plot(df[keys[1]], marker='o', label = 'SGD-Momentum')
plt.legend()
plt.show()

# Hidden Layer 7

In [None]:
class ANNModel7(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim):
        super(ANNModel7, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x)) 
        
        x = torch.sigmoid(self.fc8(x)) 
        return x

In [None]:
# ANN7 lr finder
# Drop ASGD
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel7(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6)
    
    loss_list = lr_finder(model, optimizer, train_loader, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_7.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_7.csv")
df

In [None]:
idx = 1
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN7 train
train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
stop = 10

output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.5),
    "SGD-Momentum": (torch.optim.SGD, 0.05),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel7(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_7.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_7.csv')
df

In [None]:
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.yscale('log')
plt.plot(df[keys[0]], marker='o', label = 'SGD')
plt.plot(df[keys[1]], marker='o', label = 'SGD-Momentum')
plt.legend()
plt.show()

# Hidden Layer 8

In [None]:
class ANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim):
        super(ANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x))
        x = torch.relu(self.fc8(x)) 
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# ANN8 lr finder
# Drop ASGD
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
output_dim = 1
batch_size = 128

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "SGD": torch.optim.SGD,
    # "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6)
    
    loss_list = lr_finder(model, optimizer, train_loader, (-2, 2), 50)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_8.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_8.csv")
df

In [None]:
idx = 1
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# ANN8 train
train_index = 32
batch_size = 128
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
stop = 10

output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "SGD": (torch.optim.SGD, 2),
    "SGD-Momentum": (torch.optim.SGD, 0.1),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_8.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_8.csv')
df

In [None]:
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.yscale('log')
plt.plot(df[keys[0]], marker='o', label = 'SGD')
plt.plot(df[keys[1]], marker='o', label = 'SGD-Momentum')
plt.legend()
plt.show()

# Adding dropout, batch normalization, and weight decay

Adding batch normalization in addition to dropout since the model seems to be unable to converge with adding dropout

In [None]:
class NewANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob):
        super(NewANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.bn1 = nn.BatchNorm1d(hd_0) 
        self.dropout1 = nn.Dropout(p=drop_prob)      
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.bn2 = nn.BatchNorm1d(hd_1)
        self.dropout2 = nn.Dropout(p=drop_prob)        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.bn3 = nn.BatchNorm1d(hd_2)
        self.dropout3 = nn.Dropout(p=drop_prob)          
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.bn4 = nn.BatchNorm1d(hd_3)
        self.dropout4 = nn.Dropout(p=drop_prob)           
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.bn5 = nn.BatchNorm1d(hd_4)
        self.dropout5 = nn.Dropout(p=drop_prob)      
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.bn6 = nn.BatchNorm1d(hd_5)
        self.dropout6 = nn.Dropout(p=drop_prob)   
        self.fc7 = nn.Linear(hd_5, hd_6)
        self.bn7 = nn.BatchNorm1d(hd_6)
        self.dropout7 = nn.Dropout(p=drop_prob)     
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.bn8 = nn.BatchNorm1d(hd_7)
        self.dropout8 = nn.Dropout(p=drop_prob)   
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)     
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)      
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)  
        x = torch.relu(self.bn4(self.fc4(x)))    
        x = self.dropout4(x)  
        x = torch.relu(self.bn5(self.fc5(x)))     
        x = self.dropout5(x)  
        x = torch.relu(self.bn6(self.fc6(x)))
        x = self.dropout6(x)  
        x = torch.relu(self.bn7(self.fc7(x)))
        x = self.dropout7(x)  
        x = torch.relu(self.bn8(self.fc8(x)))
        x = self.dropout8(x)  
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# ANN8 lr finder
# Drop ASGD
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
output_dim = 1
drop_prob = 0.2
batch_size = 128
weight_decay = 0.001

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = NewANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6, weight_decay=weight_decay)
    
    loss_list = lr_finder(model, optimizer, train_loader, (-4, 0), 50)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
lr_range = [-4, 0]
lr_list = torch.logspace(lr_range[0], lr_range[1], steps=50)
lr_list = [tensor.detach().item() for tensor in lr_list]
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_8_new.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_8_new.csv")
df

In [None]:
idx = 1
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
# NewANN8 train
train_index = 32
batch_size = 128
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)

input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
drop_prob = 0.2
weight_decay = 0.001
stop = 20
output_dim = 1

train_loss_dict = {}
val_loss_dict = {}

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.02),
    "SGD-Momentum": (torch.optim.SGD, 0.002),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = NewANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_8_new.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_8_new.csv')
df

In [None]:
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.yscale('log')
plt.plot(df[keys[0]], marker='o', label = 'SGD')
plt.plot(df[keys[1]], marker='o', label = 'SGD-Momentum')
plt.legend()
plt.show()

# Experiment with 64 hidden layers

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ANN64(nn.Module):
    def __init__(self, input_size=1088, output_size=1, hidden_size=512, dropout_prob=0.2):
        super(ANN64, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_size))
        self.layers.append(nn.BatchNorm1d(hidden_size))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout_prob))
        for _ in range(63):
            self.layers.append(nn.Linear(hidden_size, hidden_size))
            self.layers.append(nn.BatchNorm1d(hidden_size))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout_prob))
        self.output_layer = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = torch.sigmoid(self.output_layer(x))
        return x

In [None]:
# ANN64 lr finder
batch_size = 128
weight_decay = 0.001

train_index = 32
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, _ = prepare_data(train_df, batch_size)

loss_dict = {}

optimizer_dict = {
    "SGD": torch.optim.SGD,
    "SGD-Momentum": torch.optim.SGD,
}

for algo_name, optimizer_class in optimizer_dict.items():
    print(algo_name)
    model = ANN64()
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=1e-6, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=1e-6, weight_decay=weight_decay)
    
    loss_list = lr_finder(model, optimizer, train_loader, (-6, 0), 100)
    loss_dict[algo_name] = loss_list

In [None]:
df = pd.DataFrame(loss_dict)
lr_range = [-6, 0]
lr_list = torch.logspace(lr_range[0], lr_range[1], steps=100)
lr_list = [tensor.detach().item() for tensor in lr_list]
df['LR'] = lr_list
df.to_csv('./model_histories/ANN/lr_finder_hd_64.csv', index = False)

In [None]:
df = pd.read_csv("./model_histories/ANN/lr_finder_hd_64.csv")
df

In [None]:
idx = 0
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.title(keys[idx])
plt.xscale('log')
plt.xlabel('Learning Rate')
plt.ylabel('Loss')
plt.plot(df['LR'], df[keys[idx]], marker='o')
plt.show()

In [None]:
optimizer_dict = {
    "SGD": (torch.optim.SGD, 1e-3),
    "SGD-Momentum": (torch.optim.SGD, 1e-4),
}

train_index = 32
batch_size = 128
print(f"TRAIN_INDEX: {train_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, test_loader = prepare_data(train_df, batch_size)
stop = 10

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANN64()
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict[algo_name] = train_loss_list
    val_loss_dict[algo_name] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/structure_selector_val_loss_hd_64.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/structure_selector_val_loss_hd_64.csv')
df

In [None]:
keys = list(df.columns)
plt.figure(figsize=(10, 6))
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.yscale('log')
plt.plot(df[keys[0]], marker='o', label = 'SGD')
plt.plot(df[keys[1]], marker='o', label = 'SGD-Momentum')
plt.legend()
plt.show()

# Training with one csv and testing with another
This didn't work. Probably simultaneously traning with one csv and testing with another is too difficult for the model to converge

In [None]:
train_index = random.randint(0,55)
while True:
    test_index = random.randint(0,55)
    if train_index != test_index:
        break
batch_size = 128
print(f"TRAIN_INDEX: {train_index}, TEST_INDEX: {test_index}")
train_df = pd.read_csv("./lichess/data_1/" + csvs[train_index])
train_loader, _ = prepare_data(train_df, batch_size)
test_df = pd.read_csv("./lichess/data_1/" + csvs[test_index])
_, test_loader = prepare_data(test_df, batch_size)
train_loss_dict = {}
val_loss_dict = {}

In [None]:
class ANNModel1(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, output_dim):
        super(ANNModel1, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)

        x = self.sig1(x)

        return x

In [None]:
optimizer_dict = {
    "Adamax": (torch.optim.Adamax, 1e-4),
}

input_dim = 1088
hd_0 = 512
output_dim = 1
stop = 20

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel1(input_dim, hd_0, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_1_adamax"] = train_loss_list
    val_loss_dict["ann_1_adamax"] = val_loss_list

In [None]:
class ANNModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, output_dim):
        super(ANNModel2, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)

        x = self.sig1(x)

        return x

In [None]:
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
output_dim = 1
stop = 20

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel2(input_dim, hidden_dim_0, hidden_dim_1, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_2_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_2_SGD-Momentum"] = val_loss_list

In [None]:
class ANNModel3(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim):
        super(ANNModel3, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, hidden_dim_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hidden_dim_2, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN3 train
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
output_dim = 1
stop = 20

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel3(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_3_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_3_SGD-Momentum"] = val_loss_list

In [None]:
class ANNModel4(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, output_dim):
        super(ANNModel4, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN4 train
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
hidden_dim_3 = 512
output_dim = 1
stop = 20


optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel4(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, hidden_dim_3, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_4_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_4_SGD-Momentum"] = val_loss_list

In [None]:
class ANNModel5(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim):
        super(ANNModel5, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.relu5 = nn.ReLU()
        
        self.fc6 = nn.Linear(hd_4, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)
        x = self.relu5(x)
        
        x = self.fc6(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN5 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
output_dim = 1
stop = 20

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.1),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel5(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_5_SGD"] = train_loss_list
    val_loss_dict["ann_5_SGD"] = val_loss_list

In [None]:
class ANNModel6(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim):
        super(ANNModel6, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)        
        self.fc7 = nn.Linear(hd_5, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        
        x = torch.sigmoid(self.fc7(x)) 
        return x

In [None]:
# ANN6 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
stop = 20
output_dim = 1


optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.03),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel6(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_6_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_6_SGD-Momentum"] = val_loss_list

In [None]:
class ANNModel7(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim):
        super(ANNModel7, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x)) 
        
        x = torch.sigmoid(self.fc8(x)) 
        return x

In [None]:
# ANN7 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
stop = 20
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.05),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel7(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_7_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_7_SGD-Momentum"] = val_loss_list

In [None]:
class ANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim):
        super(ANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x))
        x = torch.relu(self.fc8(x)) 
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# ANN8 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
stop = 20
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.1),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["ann_8_SGD-Momentum"] = train_loss_list
    val_loss_dict["ann_8_SGD-Momentum"] = val_loss_list

In [None]:
class NewANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob):
        super(NewANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.bn1 = nn.BatchNorm1d(hd_0) 
        self.dropout1 = nn.Dropout(p=drop_prob)      
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.bn2 = nn.BatchNorm1d(hd_1)
        self.dropout2 = nn.Dropout(p=drop_prob)        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.bn3 = nn.BatchNorm1d(hd_2)
        self.dropout3 = nn.Dropout(p=drop_prob)          
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.bn4 = nn.BatchNorm1d(hd_3)
        self.dropout4 = nn.Dropout(p=drop_prob)           
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.bn5 = nn.BatchNorm1d(hd_4)
        self.dropout5 = nn.Dropout(p=drop_prob)      
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.bn6 = nn.BatchNorm1d(hd_5)
        self.dropout6 = nn.Dropout(p=drop_prob)   
        self.fc7 = nn.Linear(hd_5, hd_6)
        self.bn7 = nn.BatchNorm1d(hd_6)
        self.dropout7 = nn.Dropout(p=drop_prob)     
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.bn8 = nn.BatchNorm1d(hd_7)
        self.dropout8 = nn.Dropout(p=drop_prob)   
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)     
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)      
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)  
        x = torch.relu(self.bn4(self.fc4(x)))    
        x = self.dropout4(x)  
        x = torch.relu(self.bn5(self.fc5(x)))     
        x = self.dropout5(x)  
        x = torch.relu(self.bn6(self.fc6(x)))
        x = self.dropout6(x)  
        x = torch.relu(self.bn7(self.fc7(x)))
        x = self.dropout7(x)  
        x = torch.relu(self.bn8(self.fc8(x)))
        x = self.dropout8(x)  
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# ANN8 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
drop_prob = 0.2
stop = 20
output_dim = 1

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.02),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = NewANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    train_loss_list, val_loss_list = train(train_loader, test_loader, model, optimizer, stop)
    train_loss_dict["new_ann_8_SGD-Momentum"] = train_loss_list
    val_loss_dict["new_ann_8_SGD-Momentum"] = val_loss_list

In [None]:
val_loss_ranking_list = [np.mean(val_loss_dict[x][-10:]) for x in val_loss_dict]
val_loss_ranking_dict = dict(zip(list(val_loss_dict.keys()), val_loss_ranking_list))
val_loss_ranking_dict = sorted(val_loss_ranking_dict.items(), key=lambda x: x[1])
val_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/best_models_val_1.csv", index = False)

In [None]:
train_loss_ranking_list = [np.mean(train_loss_dict[x][-10:]) for x in train_loss_dict]
train_loss_ranking_dict = dict(zip(list(train_loss_dict.keys()), train_loss_ranking_list))
train_loss_ranking_dict = sorted(train_loss_ranking_dict.items(), key=lambda x: x[1])
train_loss_ranking_dict

In [None]:
max_len = max(len(lst) for lst in train_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in train_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/best_models_train_1.csv", index = False)

# Training with multiple files

In [8]:
def test(model, test_loader):
    error = nn.L1Loss()
    temp_loss_list = []
    for boards, labels in test_loader:
        test = boards.view(-1, 1088)
        outputs = model(test)
        temp_loss_list.append(error(outputs, labels.view(-1, 1)).detach().item())
    val_loss = np.mean(temp_loss_list)
    print(f"VAL:_LOSS: {round(float(val_loss), 4)}")
    return val_loss

In [9]:
def train(train_loader, valid_loader, test_laoder, model, optimizer, stop):
    error = nn.L1Loss()
    train_loss_list = []
    val_loss_list = []
    test_loss_list = []
    min_loss = float('inf')
    epoch_count = 1
    stop_count = 0
    
    while True:
        temp_loss_list = []
        
        for boards, labels in train_loader:
            train = boards.view(-1, 1088)
            labels = labels.view(-1, 1)
            optimizer.zero_grad()
            outputs = model(train)
            loss = error(outputs, labels)
            temp_loss_list.append(loss.detach().item())
            loss.backward()
            optimizer.step()
            
        train_loss = np.mean(temp_loss_list)
        train_loss_list.append(train_loss)
        temp_loss_list = []

        for boards, labels in valid_loader:
            val = boards.view(-1, 1088)
            outputs = model(val)
            temp_loss_list.append(error(outputs, labels.view(-1, 1)).detach().item())
        
        val_loss = np.mean(temp_loss_list)
        val_loss_list.append(val_loss)
        
        if val_loss < min_loss:
            min_loss = val_loss
            stop_count = 0
        else:
            stop_count += 1

        if test_loader:
            test_loss = test(model, test_loader)
            test_loss_list.append(test_loss)

        print(f"EPOCH: {epoch_count}, TRAIN_LOSS: {round(float(train_loss), 4)}, VAL:_LOSS: {round(float(val_loss), 4)}, TEST_LOSS: {round(float(test_loss), 4)} STOP_COUNT: {stop_count}")
        epoch_count += 1

        if stop_count >= stop:
            print(f"MIN_VAL_LOSS: {min(val_loss_list)}")
            break
    return train_loss_list, val_loss_list, test_loss_list

In [10]:
def train_all(model, optimizer, test_loader, n_files, stop):
    total_train_loss_list = []
    total_val_loss_list = []
    total_test_loss_list = []
    for i in range(n_files):
        print(f"FILE_NUMBER: {i}")
        df = pd.read_csv("./lichess/data_1/" + csvs[i])
        train_loader, val_loader = prepare_data(df, batch_size)
        train_loss_list, val_loss_list, test_loss_list = train(train_loader, val_loader, test_loader, model, optimizer, stop)
        total_train_loss_list.append(train_loss_list)
        total_val_loss_list.append(val_loss_list)
        total_test_loss_list.append(test_loss_list)
    return total_train_loss_list, total_val_loss_list, total_test_loss_list

In [11]:
batch_size = 128
n_files = 1
test_index = n_files
stop = 10
print(f"TEST_INDEX: {test_index}")
test_df = pd.read_csv("./lichess/data_1/" + csvs[test_index])
_, test_loader = prepare_data(test_df, batch_size)
train_loss_dict = {}
val_loss_dict = {}
test_loss_dict = {}

TEST_INDEX: 1


In [12]:
class ANNModel1(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, output_dim):
        super(ANNModel1, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)  

        x = self.sig1(x)

        return x

In [13]:
optimizer_dict = {
    "Adamax": (torch.optim.Adamax, 1e-4),
}

input_dim = 1088
hd_0 = 512
output_dim = 1

for algo_name, optimizer_tuple in optimizer_dict.items():
    print(algo_name)
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel1(input_dim, hd_0, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_1_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_1_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_1_file_{n_files}'] = total_test_loss_list

Adamax
TRAINING
FILE_NUMBER: 0
VAL:_LOSS: 0.2269
EPOCH: 1, TRAIN_LOSS: 0.2246, VAL:_LOSS: 0.2176, TEST_LOSS: 0.2269 STOP_COUNT: 0
VAL:_LOSS: 0.2245
EPOCH: 2, TRAIN_LOSS: 0.2134, VAL:_LOSS: 0.209, TEST_LOSS: 0.2245 STOP_COUNT: 0
VAL:_LOSS: 0.2234
EPOCH: 3, TRAIN_LOSS: 0.2059, VAL:_LOSS: 0.2033, TEST_LOSS: 0.2234 STOP_COUNT: 0
VAL:_LOSS: 0.2217
EPOCH: 4, TRAIN_LOSS: 0.2, VAL:_LOSS: 0.1975, TEST_LOSS: 0.2217 STOP_COUNT: 0
VAL:_LOSS: 0.2201
EPOCH: 5, TRAIN_LOSS: 0.195, VAL:_LOSS: 0.1932, TEST_LOSS: 0.2201 STOP_COUNT: 0
VAL:_LOSS: 0.2192
EPOCH: 6, TRAIN_LOSS: 0.1906, VAL:_LOSS: 0.1895, TEST_LOSS: 0.2192 STOP_COUNT: 0
VAL:_LOSS: 0.2186
EPOCH: 7, TRAIN_LOSS: 0.1867, VAL:_LOSS: 0.1858, TEST_LOSS: 0.2186 STOP_COUNT: 0
VAL:_LOSS: 0.2181
EPOCH: 8, TRAIN_LOSS: 0.1828, VAL:_LOSS: 0.1835, TEST_LOSS: 0.2181 STOP_COUNT: 0
VAL:_LOSS: 0.2173
EPOCH: 9, TRAIN_LOSS: 0.1792, VAL:_LOSS: 0.1793, TEST_LOSS: 0.2173 STOP_COUNT: 0
VAL:_LOSS: 0.2161
EPOCH: 10, TRAIN_LOSS: 0.1754, VAL:_LOSS: 0.1761, TEST_LOSS: 0.21

KeyboardInterrupt: 

In [None]:
class ANNModel2(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, output_dim):
        super(ANNModel2, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)

        x = self.sig1(x)

        return x

In [None]:
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel2(input_dim, hidden_dim_0, hidden_dim_1, output_dim)
    initial_params = model.state_dict().copy()
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_2_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_2_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_2_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel3(nn.Module):
    def __init__(self, input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim):
        super(ANNModel3, self).__init__()

        self.fc1 = nn.Linear(input_dim, hidden_dim_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hidden_dim_0, hidden_dim_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hidden_dim_1, hidden_dim_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hidden_dim_2, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN3 train
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel3(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_3_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_3_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_3_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel4(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, output_dim):
        super(ANNModel4, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN4 train
input_dim = 1088
hidden_dim_0 = 512
hidden_dim_1 = 512
hidden_dim_2 = 512
hidden_dim_3 = 512
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.01),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel4(input_dim, hidden_dim_0, hidden_dim_1, hidden_dim_2, hidden_dim_3, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_4_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_4_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_4_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel5(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim):
        super(ANNModel5, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.relu2 = nn.ReLU()
        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.relu3 = nn.ReLU()
        
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.relu4 = nn.ReLU()
        
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.relu5 = nn.ReLU()
        
        self.fc6 = nn.Linear(hd_4, output_dim)

        self.sig1 = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        x = self.relu3(x)
        
        x = self.fc4(x)
        x = self.relu4(x)
        
        x = self.fc5(x)
        x = self.relu5(x)
        
        x = self.fc6(x)

        x = self.sig1(x)

        return x

In [None]:
# ANN5 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
output_dim = 1

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.1),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel5(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_5_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_5_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_5_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel6(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim):
        super(ANNModel6, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)        
        self.fc7 = nn.Linear(hd_5, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        
        x = torch.sigmoid(self.fc7(x)) 
        return x

In [None]:
# ANN6 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
output_dim = 1


optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.03),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel6(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_6_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_6_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_6_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel7(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim):
        super(ANNModel7, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x)) 
        
        x = torch.sigmoid(self.fc8(x)) 
        return x

In [None]:
# ANN7 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.05),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel7(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_7_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_7_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_7_file_{n_files}'] = total_test_loss_list

In [None]:
class ANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim):
        super(ANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)        
        self.fc2 = nn.Linear(hd_0, hd_1)        
        self.fc3 = nn.Linear(hd_1, hd_2)        
        self.fc4 = nn.Linear(hd_2, hd_3)        
        self.fc5 = nn.Linear(hd_3, hd_4)        
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.fc7 = nn.Linear(hd_5, hd_6)         
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))      
        x = torch.relu(self.fc2(x))      
        x = torch.relu(self.fc3(x))      
        x = torch.relu(self.fc4(x))      
        x = torch.relu(self.fc5(x))      
        x = torch.relu(self.fc6(x)) 
        x = torch.relu(self.fc7(x))
        x = torch.relu(self.fc8(x)) 
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# ANN8 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
output_dim = 1

optimizer_dict = {
    "SGD-Momentum": (torch.optim.SGD, 0.1),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_8_file_{n_files}'] = total_train_loss_list
    val_loss_dict[f'ann_hd_8_file_{n_files}'] = total_val_loss_list
    test_loss_dict[f'ann_hd_8_file_{n_files}'] = total_test_loss_list

In [None]:
class NewANNModel8(nn.Module):
    def __init__(self, input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob):
        super(NewANNModel8, self).__init__()

        self.fc1 = nn.Linear(input_dim, hd_0)
        self.bn1 = nn.BatchNorm1d(hd_0) 
        self.dropout1 = nn.Dropout(p=drop_prob)      
        self.fc2 = nn.Linear(hd_0, hd_1)
        self.bn2 = nn.BatchNorm1d(hd_1)
        self.dropout2 = nn.Dropout(p=drop_prob)        
        self.fc3 = nn.Linear(hd_1, hd_2)
        self.bn3 = nn.BatchNorm1d(hd_2)
        self.dropout3 = nn.Dropout(p=drop_prob)          
        self.fc4 = nn.Linear(hd_2, hd_3)
        self.bn4 = nn.BatchNorm1d(hd_3)
        self.dropout4 = nn.Dropout(p=drop_prob)           
        self.fc5 = nn.Linear(hd_3, hd_4)
        self.bn5 = nn.BatchNorm1d(hd_4)
        self.dropout5 = nn.Dropout(p=drop_prob)      
        self.fc6 = nn.Linear(hd_4, hd_5)
        self.bn6 = nn.BatchNorm1d(hd_5)
        self.dropout6 = nn.Dropout(p=drop_prob)   
        self.fc7 = nn.Linear(hd_5, hd_6)
        self.bn7 = nn.BatchNorm1d(hd_6)
        self.dropout7 = nn.Dropout(p=drop_prob)     
        self.fc8 = nn.Linear(hd_6, hd_7)
        self.bn8 = nn.BatchNorm1d(hd_7)
        self.dropout8 = nn.Dropout(p=drop_prob)   
        self.fc9 = nn.Linear(hd_7, output_dim)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)     
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)      
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)  
        x = torch.relu(self.bn4(self.fc4(x)))    
        x = self.dropout4(x)  
        x = torch.relu(self.bn5(self.fc5(x)))     
        x = self.dropout5(x)  
        x = torch.relu(self.bn6(self.fc6(x)))
        x = self.dropout6(x)  
        x = torch.relu(self.bn7(self.fc7(x)))
        x = self.dropout7(x)  
        x = torch.relu(self.bn8(self.fc8(x)))
        x = self.dropout8(x)  
        
        x = torch.sigmoid(self.fc9(x)) 
        return x

In [None]:
# NewANN8 train
input_dim = 1088
hd_0 = 512
hd_1 = 512
hd_2 = 512
hd_3 = 512
hd_4 = 512
hd_5 = 512
hd_6 = 512
hd_7 = 512
drop_prob = 0.2
weight_decay = 0.001
output_dim = 1
stop = 20

optimizer_dict = {
    "SGD": (torch.optim.SGD, 0.02),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = NewANNModel8(input_dim, hd_0, hd_1, hd_2, hd_3, hd_4, hd_5, hd_6, hd_7, output_dim, drop_prob)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    print("TRAINING")
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_8_file_{n_files}_r'] = total_train_loss_list
    val_loss_dict[f'ann_hd_8_file_{n_files}_r'] = total_val_loss_list
    test_loss_dict[f'ann_hd_8_file_{n_files}_r'] = total_test_loss_list

TEST_INDEX: 2
TRAINING
FILE_NUMBER: 0


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [None]:
max_len = max(len(lst) for lst in train_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in train_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/best_models_train_1.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/best_models_train_1.csv')
df

In [None]:
max_len = max(len(lst) for lst in val_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in val_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/best_models_val_1.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/best_models_val_1.csv')
df

In [None]:
max_len = max(len(lst) for lst in test_loss_dict.values())
loss_filled_results = {k: v + [None] * (max_len - len(v)) for k, v in test_loss_dict.items()}
df = pd.DataFrame(loss_filled_results)
df.to_csv("./model_histories/ANN/best_models_test_1.csv", index = False)

In [None]:
df = pd.read_csv('./model_histories/ANN/best_models_test_1.csv')
df

In [1]:
import os
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
import random
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import chess
import pickle

In [2]:
def prepare_data(df, batch_size):
    targets_numpy = df.result.values
    features_numpy = df.loc[:,df.columns != "result"].values

    features_train, features_test, targets_train, targets_test = train_test_split(
        features_numpy,
        targets_numpy,
        test_size = 0.2,
        random_state = random.randint(0,100)
    )

    featuresTrain = torch.from_numpy(features_train).type(torch.float32)
    targetsTrain = torch.from_numpy(targets_train).type(torch.float32)

    featuresTest = torch.from_numpy(features_test).type(torch.float32)
    targetsTest = torch.from_numpy(targets_test).type(torch.float32)

    train = torch.utils.data.TensorDataset(featuresTrain,targetsTrain)
    test = torch.utils.data.TensorDataset(featuresTest,targetsTest)

    train_loader = DataLoader(train, batch_size = batch_size, shuffle = True)
    test_loader = DataLoader(test, batch_size = batch_size, shuffle = True)

    return train_loader, test_loader

In [3]:
batch_size = 256
n_files = 27
test_index = n_files
stop = 5
print(f"TEST_INDEX: {test_index}")
test_df = pd.read_csv(f"./gm/data_1/bitboard_{test_index}.csv")
_, test_loader = prepare_data(test_df, batch_size)
train_loss_dict = {}
val_loss_dict = {}
test_loss_dict = {}
device = "cpu"

TEST_INDEX: 27


In [4]:
def test(model, test_loader):
    error = nn.MSELoss()
    temp_loss_list = []
    for boards, labels in test_loader:
        val = boards.view(-1, 1088).to(device)
        labels = labels.view(-1, 1).to(device)
        outputs = model(val)
        temp_loss_list.append(error(outputs, labels).detach().item())
    val_loss = np.mean(temp_loss_list)
    return val_loss

In [5]:
def train(train_loader, valid_loader, test_laoder, model, optimizer, stop):
    error = nn.MSELoss()
    train_loss_list = []
    val_loss_list = []
    test_loss_list = []
    min_loss = float('inf')
    epoch_count = 1
    stop_count = 0
    
    while True:
        temp_loss_list = []
        
        for boards, labels in train_loader:
            train = boards.view(-1, 1088).to(device)
            labels = labels.view(-1, 1).to(device)
            optimizer.zero_grad()
            outputs = model(train)
            loss = error(outputs, labels)
            temp_loss_list.append(loss.detach().item())
            loss.backward()
            optimizer.step()
            
        train_loss = np.mean(temp_loss_list)
        train_loss_list.append(train_loss)
        temp_loss_list = []

        for boards, labels in valid_loader:
            val = boards.view(-1, 1088).to(device)
            labels = labels.view(-1, 1).to(device)
            outputs = model(val)
            temp_loss_list.append(error(outputs, labels).detach().item())
        
        val_loss = np.mean(temp_loss_list)
        val_loss_list.append(val_loss)
        
        if val_loss < min_loss:
            min_loss = val_loss
            stop_count = 0
        else:
            stop_count += 1

        if test_loader:
            test_loss = test(model, test_loader)
            test_loss_list.append(test_loss)

        print(f"EPOCH: {epoch_count}, TRAIN_LOSS: {round(float(train_loss), 4)}, VAL_LOSS: {round(float(val_loss), 4)}, TEST_LOSS: {round(float(test_loss), 4)} STOP_COUNT: {stop_count}")
        epoch_count += 1

        if stop_count > stop:
            print(f"AVG_TRAIN_LOSS: {np.mean(train_loss_list[stop * (-1):])}")
            print(f"AVG_VAL_LOSS: {np.mean(val_loss_list[stop * (-1):])}")
            print(f"AVG_TEST_LOSS: {np.mean(test_loss_list[stop * (-1):])}")
            break
    return train_loss_list, val_loss_list, test_loss_list

In [6]:
def train_all(model, optimizer, test_loader, n_files, stop):
    total_train_loss_list = []
    total_val_loss_list = []
    total_test_loss_list = []
    file_order = [i for i in range(n_files)]
    random.shuffle(file_order)
    for i, file in enumerate(file_order):
        print(f"{i+1}TH FILE, FILE_ID: {file}")
        df = pd.read_csv(f"./gm/data_1/bitboard_{file}.csv")
        train_loader, val_loader = prepare_data(df, batch_size)
        train_loss_list, val_loss_list, test_loss_list = train(train_loader, val_loader, test_loader, model, optimizer, stop)
        total_train_loss_list.append(train_loss_list)
        total_val_loss_list.append(val_loss_list)
        total_test_loss_list.append(test_loss_list)
    return total_train_loss_list, total_val_loss_list, total_test_loss_list

In [7]:
class ANN16(nn.Module):
    def __init__(self, input_size=1088, output_size=1, hidden_size=512, dropout_prob=0.2):
        super(ANN16, self).__init__()
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, hidden_size))
        self.layers.append(nn.BatchNorm1d(hidden_size))
        self.layers.append(nn.ReLU())
        self.layers.append(nn.Dropout(dropout_prob))
        for _ in range(15):
            self.layers.append(nn.Linear(hidden_size, hidden_size))
            self.layers.append(nn.BatchNorm1d(hidden_size))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(dropout_prob))
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = torch.tanh(self.output_layer(x))
        return x

In [8]:
weight_decay = 0.001

optimizer_dict = {
    "Adam": (torch.optim.Adam, 0.00012618),
}

for algo_name, optimizer_tuple in optimizer_dict.items():
    optimizer_class = optimizer_tuple[0]
    learning_rate = optimizer_tuple[1]
    model = ANN16()
    model.to(device)
    if algo_name == "SGD-Momentum":
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, momentum = 0.9, weight_decay=weight_decay)
    else:
        optimizer = optimizer_class(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    total_train_loss_list, total_val_loss_list, total_test_loss_list = train_all(model, optimizer, test_loader, n_files, stop)
    train_loss_dict[f'ann_hd_64_file_{n_files}_r'] = total_train_loss_list
    val_loss_dict[f'ann_hd_64_file_{n_files}_r'] = total_val_loss_list
    test_loss_dict[f'ann_hd_64_file_{n_files}_r'] = total_test_loss_list
torch.save(model, "hd16r_adam_gm.pt")

1TH FILE, FILE_ID: 22
EPOCH: 1, TRAIN_LOSS: 0.6406, VAL_LOSS: 0.6262, TEST_LOSS: 0.6374 STOP_COUNT: 0
EPOCH: 2, TRAIN_LOSS: 0.6167, VAL_LOSS: 0.6156, TEST_LOSS: 0.6272 STOP_COUNT: 0


KeyboardInterrupt: 