In [50]:
import torch
import torch.nn as nn
import pandas as pd
from chessEngine import ChessEncoder, MLPEngine
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import CosineAnnealingLR

# Preprocessing

In [51]:
df = pd.read_csv('fen_analysis.csv').sample(frac=1)[:30000] # This shuffles the rows
df

Unnamed: 0,fen_value,score
43640,r1bqkbnr/ppp2ppp/2n1p3/3pP3/3P4/8/PPP2PPP/RNBQ...,80
144874,r2q1rk1/1b1n1ppp/1pn5/1Bbpp3/8/P3PN1P/1P1B1PP1...,56
217528,r2qk1nr/pbpnppbp/1p1p2p1/8/4P3/3PBNP1/PPPQ1PBP...,220
336398,4r1k1/R2n1pp1/1BNbb2p/1P6/2P1p3/7P/4BPP1/6K1 w...,302
104039,2k5/1pp4p/p4p2/8/2PN4/1P2r3/P6P/5r1K w - - 0 31,-558
...,...,...
54855,r3r1k1/ppp2p1p/5q2/4nbp1/4pN2/PP2P1PP/2P2PB1/R...,-64
255677,5r2/8/4p2k/3pP1bp/p2P4/P1B4K/1P2B3/8 w - - 2 41,-269
102868,rnbqkbnr/ppp3pp/5p2/3pp3/5P2/1P2P3/PBPP2PP/RN1...,32
109529,rn1k1b2/6p1/2n4r/1p5p/2P5/4NP2/PB1P2PP/R2Q1RK1...,-628


In [52]:
encoder_object = ChessEncoder()

In [53]:
fen_encodings = []
for fen_i in df['fen_value']:
    encoded_fen = encoder_object.encode_fen(fen_i)
    fen_encodings.append(encoded_fen)

['r1bqkbnr/ppp2ppp/2n1p3/3pP3/3P4/8/PPP2PPP/RNBQKBNR', 'w', 'KQkq', '-']
['r2q1rk1/1b1n1ppp/1pn5/1Bbpp3/8/P3PN1P/1P1B1PP1/RN1Q1RK1', 'b', '-', '-']
['r2qk1nr/pbpnppbp/1p1p2p1/8/4P3/3PBNP1/PPPQ1PBP/RN2K2R', 'b', 'KQkq', '-']
['4r1k1/R2n1pp1/1BNbb2p/1P6/2P1p3/7P/4BPP1/6K1', 'w', '-', '-']
['2k5/1pp4p/p4p2/8/2PN4/1P2r3/P6P/5r1K', 'w', '-', '-']
['rn1qkb1r/1bpp1ppp/p3pn2/1p6/4P3/2NP1NP1/PPP2PBP/R1BQK2R', 'b', 'KQkq', '-']
['6k1/3r2pp/4b3/4Bp2/6P1/1P3B2/P3K3/8', 'b', '-', '-']
['r1bqkbnr/pppp1ppp/2n5/4P3/5P2/8/PPP1P1PP/RNBQKBNR', 'b', 'KQkq', '-']
['rnbqkb1r/pp3p2/2p1pnp1/3p3p/3P4/4P2P/PPP1BPPN/RNBQ1RK1', 'b', 'kq', '-']
['rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBNR', 'w', 'KQkq', '-']
['4r1k1/p1pp1ppp/1p6/8/8/8/P3BPPP/1q3KNR', 'w', '-', '-']
['2kr1b1r/p1q2ppp/1pn1pn2/2pp4/5P2/1P1PPN2/PBPN2PP/R3QRK1', 'b', '-', '-']
['r3kbnr/pppb2pp/2B5/4p3/8/2P3B1/PP3QPP/R3K1NR', 'b', 'KQkq', '-']
['3r1rk1/ppp1qppp/8/4b3/4P3/P6P/1PP1QPP1/R3R1K1', 'b', '-', '-']
['r5k1/1b3p1p/p3p1p1/1pn1Rq2/3Q4/1BP3RP/2

In [54]:
X = torch.tensor(fen_encodings, dtype=torch.int32)

In [55]:
X

tensor([[ 9,  0, 10,  ...,  0, 39, 39],
        [ 9,  0,  0,  ...,  0, 37, 37],
        [ 9,  0,  0,  ...,  0, 39, 39],
        ...,
        [ 9, 11, 10,  ...,  0, 39, 39],
        [ 9, 11,  0,  ...,  0, 31, 22],
        [ 0,  0,  7,  ...,  0, 22, 23]], dtype=torch.int32)

In [56]:
score_encodings = []
for y_i in df['score']:
    # print(y_i)
    encoded_score = encoder_object.encode_score(str(y_i))
    score_encodings.append(encoded_score)
y = torch.tensor(score_encodings, dtype=torch.float32)

In [57]:
X.shape, y.shape

(torch.Size([30000, 200]), torch.Size([30000]))

# Helper Functions

In [58]:
val_split = 10000
test_split = 10000

In [59]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [60]:
def get_batch(X, Y, bs):
    
    assert isinstance(X, torch.Tensor)
    assert isinstance(Y, torch.Tensor)

    batch = torch.randint(0, len(X), (bs,))
    x = X[batch].to(device)
    y = Y[batch].to(device).to(torch.bfloat16)
    return x, y
# b = get_batch(X, y, 64)

In [61]:
X_val = X[:val_split].to(device)
y_val = y[:val_split].to(device)
X_test = X[val_split:test_split].to(device)
y_test = y[val_split:test_split].to(device)
X = X[test_split:]
y = y[test_split:]

In [62]:
len(X)

20000

# Training

In [63]:
# hyperparameters
lr = 0.1
num_steps = 3000
warmup_steps = 50
bs = len(X)
# allowed_error = 100 #
d1 = {1:10, 2:20}
if bs > len(X): bs = len(X)

In [64]:
from torch.optim.lr_scheduler import LambdaLR


In [65]:
model = MLPEngine(embedding_dim=64).to(device)
loss_category = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr = lr) # i accidenly used a smaller lr for scheduler and it worked better, maybe try it?
# Define warm-up and decay
def lr_lambda(epoch):
    if epoch < warmup_steps:  
        return epoch / warmup_steps
    else:  # Exponential decay after warm-up
        return 0.95 ** (epoch - warmup_steps)

scheduler = LambdaLR(optimiser, lr_lambda)
model = model.to(torch.bfloat16)
model.compile()

In [66]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True

In [67]:
train_history = {}
val_history = {}
start_step = 0

In [68]:
model.train()
# train_history = {}
# val_history = {}
for step_i in range(num_steps):
    tot_step = step_i + start_step
    optimiser.zero_grad()
    x_batch, y_batch = get_batch(X, y, bs) 
    # print(x_batch, y_batch)
    y_pred = model(x_batch).view(bs)
    # print(y_pred.shape, y_batch.shape)
    loss = loss_category(y_pred, y_batch)
    # print(loss.item())
    train_history[tot_step] = loss.item()
    loss.backward()
    optimiser.step()
    scheduler.step()
    # print(f"Epoch {step_i}, Learning Rate: {scheduler.get_last_lr()}")
    print(tot_step, ': ',loss.item())

    if tot_step % 100 == 0:
        # validation phase
        y_pred = model(X_val).view(val_split)
        # print(y_pred.shape, y_batch.shape)
        loss = loss_category(y_pred, y_val)
        # print(loss.item())
        val_history[tot_step] = loss.item()

start_step += num_steps

0 :  3194880.0
1 :  3162112.0
2 :  3129344.0
3 :  3014656.0
4 :  3063808.0
5 :  3276800.0
6 :  3047424.0
7 :  3063808.0
8 :  3325952.0
9 :  3014656.0
10 :  3112960.0
11 :  3178496.0
12 :  3260416.0
13 :  3391488.0
14 :  3227648.0
15 :  3145728.0
16 :  3325952.0
17 :  3080192.0
18 :  3145728.0
19 :  3112960.0
20 :  3178496.0
21 :  3211264.0
22 :  3227648.0
23 :  3129344.0
24 :  2981888.0
25 :  3194880.0
26 :  3063808.0
27 :  3211264.0
28 :  3260416.0
29 :  2981888.0
30 :  3112960.0
31 :  3178496.0
32 :  3276800.0
33 :  3178496.0
34 :  3178496.0
35 :  3014656.0
36 :  3014656.0
37 :  3244032.0
38 :  3211264.0
39 :  3211264.0
40 :  3145728.0
41 :  3047424.0
42 :  3342336.0
43 :  3129344.0
44 :  3096576.0
45 :  3194880.0
46 :  3162112.0
47 :  3309568.0
48 :  3178496.0
49 :  3162112.0
50 :  2998272.0
51 :  3260416.0
52 :  3129344.0
53 :  3293184.0
54 :  2965504.0
55 :  3014656.0
56 :  2899968.0
57 :  2998272.0
58 :  3014656.0
59 :  3162112.0
60 :  3211264.0
61 :  3244032.0
62 :  3031040.0
63

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'saves/bad_model.pt')

In [None]:
plt.plot(list(train_history.keys())[500:], list(train_history.values())[500:], label='train')
# plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
plt.plot(list(train_history.keys()), list(train_history.values()), label='train')
plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
'''
# My results
At relu, model is stuck around 25k with 3x1000 steps with xavier
AT relu, moedl went to 9k and then exploded at 3x1000 steps with kaiming; then at 14k
At gelu, model is stuck atound 10k

Adam is better than AdamW for this task
'''

# TO DO

- [x] do inference, and run a partially trained model with the GUI intact
- [ ] **find a way to fix the fact that our model is giving integer loss
- [ ] **Fix the bug in initialisation
- [ ] implement weights and biases or tensorboard 
- [ ] improve the model
  - [ ] get a better/ bigger dataset
  - [ ] hyperparameter and architecture
    - [ ] add CNN
    - [x] better encoding
    - [ ] residual connections
    - [ ] try adamW after tuning b1 and b2
    - [ ] increase embedding dim
    - [ ] increase neurons in the layers
    - [ ] increase layers in the network
    - [ ] change loss function (maybe)
    - [ ] try diff learning rate scheduler(trapeziodal)
    - [ ] Add regularisation
      - [ ] l1,l2
      - [ ] dropout
    - [ ] Better initialisation
    - [ ] diff optimisation algorithm

- make the init proper by specifying the activation in the init as claude said
-  no cnn right now, maybe in future
- 