In [1]:
import torch
import torch.nn as nn
import pandas as pd
from chessEngine import ChessEncoder, MLPEngine
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import LambdaLR

# Preprocessing

In [2]:
df = pd.read_csv('data/fen_analysis.csv').sample(frac=1)[:50000] # This shuffles the rows
# df = pd.read_csv('fen_analysis.csv')[:30000] # no shuffle
df

Unnamed: 0,fen_value,score
258805,r3k1r1/1p3p2/pbpQp1p1/7p/3PPP2/BPn2R1P/P2q2PK/...,35
97451,3r1r2/5pkp/4p1p1/p1p5/4NP1N/1qR1P3/5QPP/5RK1 b...,-394
46778,r2q1rk1/p1bpnppp/5n2/1Np5/1PP5/P5P1/4NPKP/R1BQ...,78
209753,rn1R4/p1p1Q3/1p4p1/1k2p1N1/4B3/8/PPP2PP1/2K4R ...,-8494
27388,r7/8/4p3/4Pk1p/r2Pn1P1/PRPK4/8/R3B3 b - - 0 38,-160
...,...,...
170271,r2qk2r/pp1nnbpp/2p2p2/3pb3/8/1P2P1P1/PBPNNPBP/...,34
251534,4k2r/p3p1b1/2p3pp/1q3p2/1r1P4/1PQ1PN1P/P4PP1/R...,301
207207,1r1qr1k1/1pp1b1pp/p2pb3/4n1P1/8/2N1BN1P/PPPQ1P...,7
170643,rn1Qk1nr/pbp2pbp/1p2p1p1/4P3/5B2/2N2N2/PPP2PPP...,-60


In [3]:
encoder_object = ChessEncoder()

In [4]:
fen_encodings = []
for fen_i in df['fen_value']:
    encoded_fen = encoder_object.encode_fen(fen_i)
    fen_encodings.append(encoded_fen)

In [5]:
X = torch.tensor(fen_encodings, dtype=torch.int32)

In [6]:
X

tensor([[ 9,  0,  0,  ...,  0, 29, 32],
        [ 0,  0,  0,  ...,  0, 29, 25],
        [ 9,  0,  0,  ...,  0, 34, 34],
        ...,
        [ 0,  9,  0,  ...,  0, 34, 34],
        [ 9, 11,  0,  ...,  0, 38, 29],
        [ 9,  0, 10,  ...,  0, 35, 38]], dtype=torch.int32)

In [7]:
score_encodings = []
for y_i in df['score']:
    # print(y_i)
    encoded_score = encoder_object.encode_score(str(y_i))
    score_encodings.append(encoded_score)
y = torch.tensor(score_encodings, dtype=torch.float32)

In [8]:
X.shape, y.shape

(torch.Size([50000, 200]), torch.Size([50000]))

# Helper Functions

In [9]:
val_split = 10000
test_split = 10000

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
def get_batch(X, Y, bs):
    
    assert isinstance(X, torch.Tensor)
    assert isinstance(Y, torch.Tensor)

    batch = torch.randint(0, len(X), (bs,))
    x = X[batch].to(device)
    y = Y[batch].to(device).to(torch.float32)
    return x, y
# b = get_batch(X, y, 64)

In [12]:
X_val = X[:val_split].to(device)
y_val = y[:val_split].to(device)
X_test = X[val_split:val_split+test_split].to(device)
y_test = y[val_split:val_split+test_split].to(device)
X = X[val_split+test_split:]
y = y[val_split+test_split:]

In [13]:
len(X)

30000

# Training

In [14]:
# hyperparameters
lr = 0.1
num_steps = 500
warmup_steps = 50
bs = len(X)
# allowed_error = 100 #
d1 = {1:10, 2:20}
if bs > len(X): bs = len(X)

In [15]:
 # i accidenly used a smaller lr for scheduler and it worked better, maybe try it?

In [16]:
model = MLPEngine(embedding_dim=64, bs = bs).to(device)
loss_category = nn.MSELoss()
optimiser = torch.optim.AdamW(
            model.parameters(), 
            lr = lr,
            betas=(0.9, 0.999),
            eps=1e-3,
            weight_decay=1e-5)

# Define warm-up and decay
def lr_lambda(epoch):
    if epoch < warmup_steps:  
        return epoch / warmup_steps
    else:  # Exponential decay after warm-up
        return 0.99 ** (epoch - warmup_steps)

scheduler = LambdaLR(optimiser, lr_lambda)
model.compile()

In [17]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
torch.set_float32_matmul_precision('high')

In [18]:
train_history = {}
val_history = {}
start_step = 0

In [19]:
model.train()
# train_history = {}
# val_history = {}
for step_i in range(num_steps):
    tot_step = step_i + start_step
    optimiser.zero_grad()
    x_batch, y_batch = get_batch(X, y, bs) 
    # print(x_batch, y_batch)
    print(x_batch.shape)
    y_pred = model(x_batch).view(bs)
    # print(y_pred.shape, y_batch.shape)
    loss = loss_category(y_pred, y_batch)
    # print(loss.item())
    train_history[tot_step] = loss.item()
    loss.backward()
    optimiser.step()
    scheduler.step()
    # print(f"Epoch {step_i}, Learning Rate: {scheduler.get_last_lr()}")
    print(tot_step, ': ',loss.item())

    if tot_step % 100 == 0 and tot_step != 0:
        # validation phase
        y_pred = model(X_val).view(val_split)
        # print(y_pred.shape, y_batch.shape)
        loss = loss_category(y_pred, y_val)
        # print(loss.item())
        val_history[tot_step] = loss.item()

start_step += num_steps

torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
0 :  3141694.5
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
1 :  3168970.0
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
2 :  3144071.0
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
3 :  3197998.0
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
4 :  3093024.0
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
5 :  3218321.25
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
6 :  3069692.25
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
7 :  2993385.75
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
8 :  3153170.25
torch.Size([30000, 200])
test 1[0m
[31mtorch.Size([30000, 12800])[0m
30000[0m
9 :  3147099.5
torch.Size([30000, 200])
t

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'saves/bad_model.pt')

In [None]:
plt.plot(list(train_history.keys())[100:], list(train_history.values())[100:], label='train')
# plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
plt.plot(list(train_history.keys()), list(train_history.values()), label='train')
plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
'''
# My results
At relu, model is stuck around 25k with 3x1000 steps with xavier
AT relu, moedl went to 9k and then exploded at 3x1000 steps with kaiming; then at 14k
At gelu, model is stuck atound 10k

Adam is better than AdamW for this task
'''

# TO DO

- [x] do inference, and run a partially trained model with the GUI intact
- [x] **find a way to fix the fact that our model is giving integer loss
- [x] **Fix the bug in initialisation
- [ ] implement weights and biases or tensorboard 
- [ ] improve the model
  - [ ] get a better/ bigger dataset
  - [ ] hyperparameter and architecture
    - [x] better encoding
    - [ ] residual connections
    - [ ] try adamW after tuning b1 and b2
    - [ ] increase embedding dim
    - [x] increase neurons in the layers
    - [x] increase layers in the network
    - [x] change loss function (maybe)
    - [ ] try diff learning rate scheduler(trapeziodal)
    - [ ] Add regularisation
      - [ ] l1,l2
      - [x] dropout
    - [x] Better initialisation
    - [x] diff optimisation algorithm

# Note

- some issue after the whole architecture was changed in gui file.
- also the dataset is bad, look for a new one. 