In [1]:
import torch
import torch.nn as nn
import pandas as pd
from chessEngine import ChessEncoder, MLPEngine
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import LambdaLR

# Preprocessing

In [2]:
df = pd.read_csv('data/fen_analysis.csv').sample(frac=1)[:50000] # This shuffles the rows
# df = pd.read_csv('fen_analysis.csv')[:30000] # no shuffle
df

Unnamed: 0,fen_value,score
237198,rnb1kb1r/ppppqppp/5n2/4p3/8/4P2N/PPPP1PPP/RNBQ...,163
6112,r1bqk2r/pp3ppp/2n1p3/8/3P4/2nB1N2/P4PPP/R1B1QR...,-54
197789,rnbqk1nr/ppp1bppp/8/4p3/3PP3/5N2/PPP3PP/RNBQKB...,175
38144,r1bqkbnr/pppn1ppp/3pp3/8/2BPP3/8/PPP2PPP/RNBQK...,69
10617,r4rk1/ppqnbppp/2pp1nb1/4p1B1/3PP1P1/2PB1N1P/PP...,22
...,...,...
217742,2k5/1pp3p1/6q1/8/3R2b1/1P2P1b1/P2P1r1P/3K4 w -...,-727
1930,rn2kb1r/pp2pppp/2p1b3/q2n4/2pP4/2N1PNB1/PPQ2PP...,-16
240429,r1b3k1/ppp2pp1/5n1p/n3N2P/1q2p1P1/4P2B/3N1P2/R...,-85
199491,2qk4/7p/3P2p1/5p2/2KQ1P1P/8/7P/8 w - - 9 40,334


In [3]:
encoder_object = ChessEncoder()

In [4]:
fen_encodings = []
for fen_i in df['fen_value']:
    encoded_fen = encoder_object.encode_fen(fen_i)
    fen_encodings.append(encoded_fen)

In [5]:
X = torch.tensor(fen_encodings, dtype=torch.int32)

In [6]:
X

tensor([[ 9, 11, 10,  ...,  0, 39, 39],
        [ 9,  0, 10,  ...,  0, 33, 34],
        [ 9, 11, 10,  ...,  0, 38, 38],
        ...,
        [ 9,  0, 10,  ...,  0, 32, 30],
        [ 0,  0,  8,  ...,  0, 13, 12],
        [ 0,  0,  0,  ...,  0,  5, 12]], dtype=torch.int32)

In [7]:
score_encodings = []
for y_i in df['score']:
    # print(y_i)
    encoded_score = encoder_object.encode_score(str(y_i))
    score_encodings.append(encoded_score)
y = torch.tensor(score_encodings, dtype=torch.float32)

In [8]:
X.shape, y.shape

(torch.Size([50000, 200]), torch.Size([50000]))

# Helper Functions

In [9]:
val_split = 10000
test_split = 10000

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
def get_batch(X, Y, bs):
    
    assert isinstance(X, torch.Tensor)
    assert isinstance(Y, torch.Tensor)

    batch = torch.randint(0, len(X), (bs,))
    x = X[batch].to(device)
    y = Y[batch].to(device).to(torch.float32)
    return x, y
# b = get_batch(X, y, 64)

In [12]:
X_val = X[:val_split].to(device)
y_val = y[:val_split].to(device)
X_test = X[val_split:val_split+test_split].to(device)
y_test = y[val_split:val_split+test_split].to(device)
X = X[val_split+test_split:]
y = y[val_split+test_split:]

In [13]:
len(X)

30000

# Training

In [14]:
# hyperparameters
lr = 0.1
num_steps = 500
warmup_steps = 20
bs_train = len(X)
bs_eval = 10000
# allowed_error = 100 #
d1 = {1:10, 2:20}
if bs_train > len(X): bs_train = len(X)

In [15]:
 # i accidenly used a smaller lr for scheduler and it worked better, maybe try it?

In [16]:
model = MLPEngine(embedding_dim=64, bs_train = bs_train, bs_eval=bs_eval).to(device)
loss_category = nn.MSELoss()
optimiser = torch.optim.AdamW(
            model.parameters(), 
            lr = lr,
            betas=(0.9, 0.999),
            eps=1e-3,
            weight_decay=1e-5)

# Define warm-up and decay
def lr_lambda(epoch):
    if epoch < warmup_steps:  
        return epoch / warmup_steps
    else:  # Exponential decay after warm-up
        return 0.99 ** (epoch - warmup_steps)

scheduler = LambdaLR(optimiser, lr_lambda)
model.compile()

In [17]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True
torch.set_float32_matmul_precision('high')

In [18]:
train_history = {}
val_history = {}
start_step = 0

In [None]:
model.train()
# train_history = {}
# val_history = {}
for step_i in range(num_steps):
    tot_step = step_i + start_step
    optimiser.zero_grad()
    x_batch, y_batch = get_batch(X, y, bs_train) 
    # print(x_batch, y_batch)
    y_pred = model(x_batch).view(bs_train)
    # print(y_pred.shape, y_batch.shape)
    loss = loss_category(y_pred, y_batch)
    # print(loss.item())
    train_history[tot_step] = loss.item()
    loss.backward()
    optimiser.step()
    scheduler.step()
    # print(f"Epoch {step_i}, Learning Rate: {scheduler.get_last_lr()}")
    print(tot_step, ': ',loss.item())

    if tot_step % 100 == 0 and tot_step != 0:
        model.eval()
        # validation phase
        y_pred = model(X_val).view(val_split)
        # print(y_pred.shape, y_batch.shape)
        loss = loss_category(y_pred, y_val)
        # print(loss.item())
        val_history[tot_step] = loss.item()
        model.train()

start_step += num_steps

0 :  3016927.5
1 :  3011849.25
2 :  3040441.75
3 :  3041635.0
4 :  3042979.0
5 :  3114352.25
6 :  3056656.25
7 :  3144858.75
8 :  3151143.5
9 :  3153551.5
10 :  3264073.5
11 :  2979139.25
12 :  3063376.0
13 :  3001871.25
14 :  3135872.5
15 :  2881217.75
16 :  2867523.5
17 :  2845629.25
18 :  3010931.5
19 :  2820270.75
20 :  2757164.25
21 :  2761374.25
22 :  2568236.0
23 :  2461835.25
24 :  2273523.0
25 :  2248040.25
26 :  2170841.0
27 :  2065784.875
28 :  1944858.375
29 :  1722006.5
30 :  1725616.125
31 :  1545782.875
32 :  1313766.375
33 :  1327864.0
34 :  1053982.75
35 :  1009687.5
36 :  933412.125
37 :  804957.375
38 :  670455.0
39 :  565281.3125
40 :  516536.90625
41 :  421120.34375
42 :  367956.9375
43 :  345027.125
44 :  310323.75
45 :  298465.75
46 :  249754.828125
47 :  258057.53125
48 :  234494.5
49 :  230769.359375
50 :  207552.59375
51 :  218473.140625
52 :  204941.328125
53 :  211099.203125
54 :  169162.21875
55 :  178440.296875
56 :  164935.453125
57 :  133069.28125
58 :  

In [None]:
torch.save(model.state_dict(), 'saves/bad_model2.pt')

In [None]:
plt.plot(list(train_history.keys())[100:], list(train_history.values())[100:], label='train')
# plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
plt.plot(list(train_history.keys()), list(train_history.values()), label='train')
plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
'''
# My results
At relu, model is stuck around 25k with 3x1000 steps with xavier
AT relu, moedl went to 9k and then exploded at 3x1000 steps with kaiming; then at 14k
At gelu, model is stuck atound 10k

Adam is better than AdamW for this task
'''

# TO DO

- [x] do inference, and run a partially trained model with the GUI intact
- [x] **find a way to fix the fact that our model is giving integer loss
- [x] **Fix the bug in initialisation
- [ ] implement weights and biases or tensorboard 
- [ ] improve the model
  - [x] get a better/ bigger dataset
  - [ ] hyperparameter and architecture
    - [x] better encoding
    - [ ] residual connections
    - [ ] try adamW after tuning b1 and b2
    - [ ] increase embedding dim
    - [x] increase neurons in the layers
    - [x] increase layers in the network
    - [x] change loss function (maybe)
    - [ ] try diff learning rate scheduler(trapeziodal)
    - [ ] Add regularisation
      - [ ] l1,l2
      - [x] dropout
    - [x] Better initialisation
    - [x] diff optimisation algorithm

# Note