In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from data_process import tokenizers, utils
from nanoGPT.model import  GPT, GPTConfig
from lightning_training import LightningGPT, GamesDataModule, GamesDataset
import pytorch_lightning as pl
import torch
import os
import numpy as np
import seaborn as sns
from data_process.utils import add_elo_token_to_games, join_material_tokens, remove_material_tokens, remove_last_player_material_token

In [2]:
def cut_game_after_30s_left(game):
    piece_uci = game.piece_uci.split(" ")
    if game.ply_30s > -1:
        piece_uci = piece_uci[:game.ply_30s * 3]
    return " ".join(piece_uci)

In [3]:
headers = ["index", "id", "date", "white_elo", "black_elo", "result", "ply", "ply_30s", "piece_uci"]

games_df = pd.read_csv("./data/test.csv", delimiter=";", header=None, names=headers)

games_df = games_df.sample(frac=0.1, random_state=42)

cut_games = games_df.apply(cut_game_after_30s_left, axis=1)

games = remove_last_player_material_token(cut_games)
games = add_elo_token_to_games(games, games_df.white_elo, games_df.black_elo)

games_df

Unnamed: 0,index,id,date,white_elo,black_elo,result,ply,ply_30s,piece_uci
75721,99034768,j8dut9JL,2025.01.31,2425,2427,1-0,85,68,Pd2d4 39 39 Ng8f6 39 39 Ng1f3 39 39 Pc7c5 39 3...
80184,7380677,UMIvcBzB,2024.12.03,1919,2020,1-0,67,47,Pd2d4 39 39 Ng8f6 39 39 Nb1c3 39 39 Pc7c5 39 3...
19864,14462158,RP3reGqr,2024.12.05,2135,2144,1-0,41,-1,Ng1f3 39 39 Pc7c6 39 39 Pg2g3 39 39 Pd7d5 39 3...
76699,99355390,OC9pukWM,2025.01.31,2402,2433,1-0,53,-1,Pc2c4 39 39 Pe7e6 39 39 Nb1c3 39 39 Pd7d5 39 3...
92991,14089339,BL4die9s,2024.12.05,1200,1143,1-0,45,-1,Pe2e4 39 39 Pd7d5 39 39 Pe4d5 39 38 Qd8d5 38 3...
...,...,...,...,...,...,...,...,...,...
5002,97062,?,2021.12.07,2726,2574,1-0,9,-1,Pd2d4 39 39 Pd7d5 39 39 Nb1c3 39 39 Ng8f6 39 3...
30151,45151922,dUvdD4GK,2024.12.15,2258,2163,0-1,80,-1,Pc2c4 39 39 Pe7e6 39 39 Ng1f3 39 39 Pd7d5 39 3...
93194,14092632,7yOJ8xPr,2024.12.05,1051,1129,1-0,45,-1,Pe2e4 39 39 Pe7e5 39 39 Pd2d3 39 39 Nb8c6 39 3...
73199,98330191,Zw7vLYf0,2025.01.31,2418,2414,0-1,48,22,Pe2e4 39 39 Pd7d5 39 39 Pe4d5 39 38 Qd8d5 38 3...


In [4]:
games_material_pair = join_material_tokens(cut_games)
games_material_pair = add_elo_token_to_games(games_material_pair, games_df.white_elo, games_df.black_elo)

In [5]:
base_games = remove_material_tokens(cut_games)
base_games = add_elo_token_to_games(base_games, games_df.white_elo, games_df.black_elo)

In [6]:
material_pair_tokenizer = tokenizers.FullMoveEloMaterialPairTokenizer()

material_tokenizer = tokenizers.FullMoveEloMaterialTokenizer()

base_tokenizer = tokenizers.FullMoveTokenizerWithElo()

material_pair_model_config = GPTConfig(
    block_size=604,
    vocab_size=len(material_pair_tokenizer.vocab),
    n_layer=8,
    n_head=8,
    n_embd=512,
    bias=False,
)

material_model_config = GPTConfig(
    block_size=604,
    vocab_size=material_tokenizer.vocab_size,
    n_layer=8,
    n_head=8,
    n_embd=512,
    bias=False,
)

base_model_config = GPTConfig(
    block_size=302,
    vocab_size=base_tokenizer.vocab_size,
    n_layer=8,
    n_head=8,
    n_embd=512,
    bias=False,
)

test_start_token = 21
test_token_step = 2

test_start_token_base = test_start_token // 2 + 1
test_token_step_base = 1

material_checkpoint = "./models/full_training/elo_material_ignore_material_prediction/epoch=9-step=1250000.ckpt"
# material_single_checkpoint = "./models/small_training/elo_material_ignore_material_prediction/epoch=4-step=78125.ckpt"


material_model = LightningGPT.load_from_checkpoint(
    material_checkpoint,
    config=material_model_config,
    test_start_token=test_start_token,
    test_token_step=test_token_step,
    trainig_ignore_first_n_targets=test_start_token,
    training_target_step=2,
    tokenizer=material_tokenizer,
    masked_elo_test=True
)

material_pair_checkpoint = "./models/full_training/elo_material_pair_ignore_material_prediction/epoch=9-step=1250000.ckpt"

material_pair_model = LightningGPT.load_from_checkpoint(
    material_pair_checkpoint,
    config=material_pair_model_config,
    test_start_token=test_start_token,
    test_token_step=test_token_step,
    trainig_ignore_first_n_targets=test_start_token,
    training_target_step=2,
    tokenizer=material_pair_tokenizer,
    masked_elo_test=True
)

base_checkpoint = "./models/full_training/masked_elo/epoch=9-step=1250000.ckpt"

base_model = LightningGPT.load_from_checkpoint(
    base_checkpoint,
    config=base_model_config,
    test_start_token=test_start_token_base,
    test_token_step=test_token_step_base,
    trainig_ignore_first_n_targets=test_start_token_base,
    training_target_step=test_token_step_base,
    tokenizer=base_tokenizer,
    masked_elo_test=True
)

base_no_mask_checkpoint = "./models/full_training/adaptive_elo/epoch=9-step=1250000.ckpt"

base_no_mask_model = LightningGPT.load_from_checkpoint(
    base_no_mask_checkpoint,
    config=base_model_config,
    test_start_token=test_start_token_base,
    test_token_step=test_token_step_base,
    trainig_ignore_first_n_targets=test_start_token_base,
    training_target_step=test_token_step_base,
    tokenizer=base_tokenizer,
    masked_elo_test=True
)


number of parameters: 27.34M
number of parameters: 28.15M
number of parameters: 27.29M
number of parameters: 27.29M


In [7]:
trainer = pl.Trainer(
    logger=False,
    accelerator="gpu",
    max_epochs=10,
    callbacks=[pl.callbacks.RichProgressBar()],
    # precision="bf16-mixed",
)
# torch.set_float32_matmul_precision('high')

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [8]:
data_module = GamesDataModule(test_games=list(games), tokenizer=material_tokenizer, num_workers=12, mask_elo_token=False)

In [9]:
test_result = trainer.test(material_model, datamodule=data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [10]:
material_model.masked_elo_test = False
test_result_2 = trainer.test(material_model, datamodule=data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [11]:
material_pair_data_module = GamesDataModule(test_games=list(games_material_pair), tokenizer=material_pair_tokenizer, num_workers=12)

In [12]:
test_result_3 = trainer.test(material_pair_model, datamodule=material_pair_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [13]:
material_pair_model.masked_elo_test = False
test_result_4 = trainer.test(material_pair_model, datamodule=material_pair_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [14]:
base_data_module = GamesDataModule(test_games=list(base_games), tokenizer=base_tokenizer, num_workers=12)
test_result_5 = trainer.test(base_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [15]:
base_model.masked_elo_test = False
test_result_6 = trainer.test(base_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [16]:
test_result_7 = trainer.test(base_no_mask_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [17]:
base_no_mask_model.masked_elo_test = False
test_result_8 = trainer.test(base_no_mask_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [18]:
masked_test_accuracy = {
    "material": test_result["test_acc"],
    "material_pair": test_result_3["test_acc"],
    "base": test_result_5["test_acc"],
    "base_no_mask": test_result_7["test_acc"]
}

unmasked_test_accuracy = {
    "material": test_result_2["test_acc"],
    "material_pair": test_result_4["test_acc"],
    "base": test_result_6["test_acc"],
    "base_no_mask": test_result_8["test_acc"]
}

In [19]:
print("Masked test accuracy:")
for key, value in masked_test_accuracy.items():
    print(f"{key}: {value:.4f}")

print("\nUnmasked test accuracy:")
for key, value in unmasked_test_accuracy.items():
    print(f"{key}: {value:.4f}")

Masked test accuracy:
material: 0.5205
material_pair: 0.5171
base: 0.5140
base_no_mask: 0.5118

Unmasked test accuracy:
material: 0.5124
material_pair: 0.5101
base: 0.5120
base_no_mask: 0.5138
