In [1]:
import torch
import torch.nn as nn
import pandas as pd
from chessEngine import ChessEncoder, MLPEngine
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import CosineAnnealingLR

# Preprocessing

In [2]:
df = pd.read_csv('fen_analysis.csv').sample(frac=1)[:30000] # This shuffles the rows
df

Unnamed: 0,fen_value,score
75668,r1bqkb1r/ppp2ppp/2n2n2/1N1pp3/3P1B2/8/PPP1PPPP...,26
289236,rnbqkbnr/pp4pp/2pp1p2/4p3/2B1P3/2P2N2/PP1P1PPP...,-90
6251,r1bqkb1r/pp2pp1p/2np1np1/2p3B1/2B1P3/3P1Q2/PPP...,115
317354,rn1qkb1r/pQpb3p/3ppnp1/5p2/3P4/2P1PP2/PP4PP/RN...,-23
126146,2kr4/pp2R3/7p/2n3p1/3p4/5P2/PPP2P1P/4R2K w - -...,437
...,...,...
253928,1r2qrk1/5ppp/p1bppb2/2p5/2P1P3/P1N2N2/2Q1BPPP/...,-297
239735,rnb1k1nr/ppp1bppp/8/8/2p5/2N2N2/PPPP1PPP/R1B1K...,498
124893,rnq2k2/1b2bppp/4p3/1p6/pQ6/P2R4/BPP2PPP/R5K1 w...,-262
332048,r4rk1/ppp1q1pp/3bpn2/8/8/2NQB3/PPP2PPP/R4RK1 b...,-22


In [3]:
encoder_object = ChessEncoder()

In [4]:
fen_encodings = []
for fen_i in df['fen_value']:
    encoded_fen = encoder_object.encode_fen(fen_i)
    fen_encodings.append(encoded_fen)

['r1bqkb1r/ppp2ppp/2n2n2/1N1pp3/3P1B2/8/PPP1PPPP/R2QKBNR', 'w', 'KQkq', '-']
['rnbqkbnr/pp4pp/2pp1p2/4p3/2B1P3/2P2N2/PP1P1PPP/RNBQ1RK1', 'b', 'kq', '-']
['r1bqkb1r/pp2pp1p/2np1np1/2p3B1/2B1P3/3P1Q2/PPP2PPP/RN2K1NR', 'b', 'KQkq', '-']
['rn1qkb1r/pQpb3p/3ppnp1/5p2/3P4/2P1PP2/PP4PP/RNB1KBNR', 'b', 'KQkq', '-']
['2kr4/pp2R3/7p/2n3p1/3p4/5P2/PPP2P1P/4R2K', 'w', '-', '-']
['6k1/q1p2rpp/pb3p2/1b1p4/1P3P1N/P2P1RQ1/1B4PP/7K', 'b', '-', '-']
['r2q1rk1/pbpnnpbp/1p1pp1p1/6P1/2P1P2P/2NP1P2/PP2N3/R1BQKB1R', 'w', 'KQ', '-']
['5k2/b4p2/5np1/7p/r1K2P2/5B1P/6P1/8', 'w', '-', '-']
['r4rk1/ppp3pp/1b2pn2/8/4p3/P1N3qP/1PPBQ1P1/R3R2K', 'w', '-', '-']
['rnbqkbnr/1p2pppp/p2p4/2p5/2B1P3/P1N5/1PPP1PPP/R1BQK1NR', 'b', 'KQkq', '-']
['4r2k/p2rNppp/8/2Q5/6q1/1P2BN2/P1P2PPP/R4RK1', 'b', '-', '-']
['r1bqkbnr/pp2pppp/2np4/8/3pP3/2N2N2/PPP2PPP/R1BQKB1R', 'w', 'KQkq', '-']
['2kr4/pppb1p2/2npqb2/3N2p1/4P1PN/3PB1K1/PPP5/R2Q4', 'b', '-', '-']
['3r1r2/1p1q1kpQ/p1p1bp2/3p4/8/1P1B4/PBP3PP/R4RK1', 'w', '-', '-']
['rn1k4/pppb1Q1

In [5]:
X = torch.tensor(fen_encodings, dtype=torch.int32)

In [6]:
X

tensor([[ 9,  0, 10,  ...,  0, 39, 39],
        [ 9, 11, 10,  ...,  0, 39, 39],
        [ 9,  0, 10,  ...,  0, 39, 39],
        ...,
        [ 9, 11,  8,  ...,  0, 28, 29],
        [ 9,  0,  0,  ...,  0, 31, 31],
        [ 0,  0,  0,  ...,  0,  2,  0]], dtype=torch.int32)

In [7]:
score_encodings = []
for y_i in df['score']:
    # print(y_i)
    encoded_score = encoder_object.encode_score(str(y_i))
    score_encodings.append(encoded_score)
y = torch.tensor(score_encodings, dtype=torch.float32)

In [8]:
X.shape, y.shape

(torch.Size([30000, 200]), torch.Size([30000]))

# Helper Functions

In [9]:
val_split = 10000
test_split = 10000

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
def get_batch(X, Y, bs):
    
    assert isinstance(X, torch.Tensor)
    assert isinstance(Y, torch.Tensor)

    batch = torch.randint(0, len(X), (bs,))
    x = X[batch].to(device)
    y = Y[batch].to(device).to(torch.bfloat16)
    return x, y
# b = get_batch(X, y, 64)

In [12]:
X_val = X[:val_split].to(device)
y_val = y[:val_split].to(device)
X_test = X[val_split:test_split].to(device)
y_test = y[val_split:test_split].to(device)
X = X[test_split:]
y = y[test_split:]

In [13]:
len(X)

20000

# Training

In [14]:
# hyperparameters
lr = 0.1
num_steps = 3000
warmup_steps = 250
bs = len(X)
# allowed_error = 100 #
d1 = {1:10, 2:20}
if bs > len(X): bs = len(X)

In [15]:
from torch.optim.lr_scheduler import LambdaLR


In [16]:
model = MLPEngine(embedding_dim=64).to(device)
loss_category = nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr = lr) # i accidenly used a smaller lr for scheduler and it worked better, maybe try it?
# Define warm-up and decay
def lr_lambda(epoch):
    if epoch < warmup_steps:  
        return epoch / warmup_steps
    else:  # Exponential decay after warm-up
        return 0.95 ** (epoch - warmup_steps)

scheduler = LambdaLR(optimiser, lr_lambda)
model = model.to(torch.bfloat16)
model.compile()

In [17]:
import torch._dynamo
torch._dynamo.config.suppress_errors = True

In [18]:
train_history = {}
val_history = {}
start_step = 0

In [19]:
model.train()
# train_history = {}
# val_history = {}
for step_i in range(num_steps):
    tot_step = step_i + start_step
    optimiser.zero_grad()
    x_batch, y_batch = get_batch(X, y, bs) 
    # print(x_batch, y_batch)
    y_pred = model(x_batch).view(bs)
    # print(y_pred.shape, y_batch.shape)
    loss = loss_category(y_pred, y_batch)
    # print(loss.item())
    train_history[tot_step] = loss.item()
    loss.backward()
    optimiser.step()
    scheduler.step()
    # print(f"Epoch {step_i}, Learning Rate: {scheduler.get_last_lr()}")
    print(tot_step, ': ',loss.item())

    if tot_step % 100 == 0:
        # validation phase
        y_pred = model(X_val).view(val_split)
        # print(y_pred.shape, y_batch.shape)
        loss = loss_category(y_pred, y_val)
        # print(loss.item())
        val_history[tot_step] = loss.item()

start_step += num_steps

0 :  2965504.0
1 :  3244032.0
2 :  3014656.0
3 :  3063808.0
4 :  3080192.0
5 :  3260416.0
6 :  3112960.0
7 :  3080192.0
8 :  3112960.0
9 :  3096576.0
10 :  3194880.0
11 :  3260416.0
12 :  3145728.0
13 :  3178496.0
14 :  3129344.0
15 :  3227648.0
16 :  3080192.0
17 :  3014656.0
18 :  3129344.0
19 :  2981888.0
20 :  3276800.0
21 :  3080192.0
22 :  3194880.0
23 :  3129344.0
24 :  2867200.0
25 :  3227648.0
26 :  3293184.0
27 :  3112960.0
28 :  3211264.0
29 :  3162112.0
30 :  3211264.0
31 :  3096576.0
32 :  3260416.0
33 :  3162112.0
34 :  3129344.0
35 :  3145728.0
36 :  3129344.0
37 :  2949120.0
38 :  3080192.0
39 :  3129344.0
40 :  3096576.0
41 :  3129344.0
42 :  3047424.0
43 :  3063808.0
44 :  3031040.0
45 :  3227648.0
46 :  3129344.0
47 :  3112960.0
48 :  3194880.0
49 :  3063808.0
50 :  3227648.0
51 :  3096576.0
52 :  3129344.0
53 :  3162112.0
54 :  2965504.0
55 :  3096576.0
56 :  3129344.0
57 :  3424256.0
58 :  3211264.0
59 :  3145728.0
60 :  3375104.0
61 :  3211264.0
62 :  2998272.0
63

KeyboardInterrupt: 

In [21]:
torch.save(model.state_dict(), 'saves/bad_model.pt')

In [None]:
plt.plot(list(train_history.keys())[500:], list(train_history.values())[500:], label='train')
# plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
plt.plot(list(train_history.keys()), list(train_history.values()), label='train')
plt.plot(val_history.keys(), val_history.values(), label='validation')
plt.legend()
plt.show()

In [None]:
'''
# My results
At relu, model is stuck around 25k with 3x1000 steps with xavier
AT relu, moedl went to 9k and then exploded at 3x1000 steps with kaiming; then at 14k
At gelu, model is stuck atound 10k

Adam is better than AdamW for this task
'''

# Inference

In [49]:
import chess
b = chess.Board()
print(b)
dir(b), b.fen()

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


(['__annotations__',
  '__class__',
  '__copy__',
  '__deepcopy__',
  '__delattr__',
  '__dict__',
  '__dir__',
  '__doc__',
  '__eq__',
  '__format__',
  '__ge__',
  '__getattribute__',
  '__gt__',
  '__hash__',
  '__init__',
  '__init_subclass__',
  '__le__',
  '__lt__',
  '__module__',
  '__ne__',
  '__new__',
  '__reduce__',
  '__reduce_ex__',
  '__repr__',
  '__setattr__',
  '__sizeof__',
  '__str__',
  '__subclasshook__',
  '__weakref__',
  '_algebraic',
  '_algebraic_and_push',
  '_algebraic_without_suffix',
  '_attacked_for_king',
  '_clear_board',
  '_ep_skewered',
  '_epd_operations',
  '_from_chess960',
  '_generate_evasions',
  '_is_halfmoves',
  '_is_safe',
  '_parse_epd_ops',
  '_push_capture',
  '_reduces_castling_rights',
  '_remove_piece_at',
  '_repr_svg_',
  '_reset_board',
  '_set_board_fen',
  '_set_castling_fen',
  '_set_chess960_pos',
  '_set_piece_at',
  '_set_piece_map',
  '_slider_blockers',
  '_stack',
  '_to_chess960',
  '_transposition_key',
  '_valid_ep_sq

In [22]:
fen_sample ='rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1'

In [34]:
def get_score(fen_sample):
    # encoding
    encoder_object = ChessEncoder()
    encoded_fen = torch.tensor(encoder_object.encode_fen(fen_i), dtype=torch.int32).to(device).view(200)
    # print(encoded_fen.shape)
    bs=1
    model = MLPEngine(embedding_dim=64).to(device)
    model.load_state_dict(torch.load('saves/bad_model.pt'))
    model.eval()
    y_pred = model(encoded_fen)
    return y_pred.item()
    # run the model

In [41]:
get_score(fen_sample)

['8/7P/8/6K1/6P1/6k1/8/8', 'b', '-', '-']


  model.load_state_dict(torch.load('saves/bad_model.pt'))


-656.3048095703125

- [ ] do inference, and run a partially trained model with the GUI intact
- [ ] find a way to fix the fact that our model is giving integer loss
- [ ] implement weights and biases or tensorboard 
- [ ] improve the model
  - [ ] get a better/ bigger dataset
  - [ ] hyperparameter and architecture
    - [ ] add CNN
    - [ ] better encoding
    - [ ] residual connections
    - [ ] try adamW after tuning b1 and b2
    - [ ] increase embedding dim
    - [ ] increase neurons in the layers
    - [ ] increase layers in the network
    - [ ] change loss function (maybe)
    - [ ] try diff learning rate scheduler(trapeziodal)
    - [ ] Add regularisation
      - [ ] l1,l2
      - [ ] dropout
    - [ ] Better initialisation
    - [ ] diff optimisation algorithm

- make the init proper by specifying the activation in the init as claude said
- cnn is good, to do that you have to change the dimension of the embedding to something good.