In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from data_process import tokenizers, utils
from nanoGPT.model import GPT, GPTConfig
from training.lightning_training import LightningGPT, GamesDataModule, GamesDataset
from datasets import load_dataset
import pytorch_lightning as pl
import torch
import os
import numpy as np
import seaborn as sns
from data_process.utils import (
    add_elo_token_to_games,
    join_material_tokens,
    remove_material_tokens,
    remove_last_player_material_token,
    row_for_base_training,
)

import training.load_models as load_models

In [2]:
from data_process.utils import add_elo_and_piece_count_to_dataset


columns_to_load = [
    #"index",
    #"id",
    #"date",
    "white_elo",
    "black_elo",
    #"result",
    "ply",
    "ply_30s",
    "piece_uci"
]



piece_count_data_files = {"test": "./data/test_piece_count.csv"}
material_data_files = {"test": "./data/test.csv"}

# Load both splits
dataset = load_dataset(
    "csv",
    data_files=piece_count_data_files,
    delimiter=";",
    usecols=columns_to_load,
    num_proc=6,
    # split="test",
)

material_dataset = load_dataset(
    "csv",
    data_files=material_data_files,
    delimiter=";",
    usecols=columns_to_load,
    num_proc=6,
    # split="test",
)

columns_to_remove = [
    #"index",
    #"id",
    #"date",
    "white_elo",
    "black_elo",
    #"result",
    # "ply",
    "ply_30s",
    "piece_uci"
]

no_elo_dataset = dataset.map(row_for_base_training, num_proc=6, remove_columns=columns_to_remove)
dataset = dataset.map(add_elo_and_piece_count_to_dataset, num_proc=6, remove_columns=columns_to_remove)
material_dataset = material_dataset.map(add_elo_and_piece_count_to_dataset, num_proc=6, remove_columns=columns_to_remove)

dataset["test"] = dataset["test"].shuffle(seed=42).select(range(20000))
material_dataset["test"] = material_dataset["test"].shuffle(seed=42).select(range(20000))
no_elo_dataset["test"] = no_elo_dataset["test"].shuffle(seed=42).select(range(20000))

In [3]:
from training.model_configs import *

test_start_token = 1
test_token_step = 2

test_start_token_base = test_start_token // 2 + 1
test_token_step_base = 1

material_checkpoint = "./models/full_training/elo_material_ignore_material_prediction/epoch=9-step=1250000.ckpt"
material_model = load_models.material_model(material_checkpoint)

material_pair_checkpoint = "./models/full_training/elo_material_pair_ignore_material_prediction/epoch=9-step=1250000.ckpt"
material_pair_model = load_models.material_pair_model(material_pair_checkpoint)

piece_count_checkpoint = "./models/full_training/elo_piece_count_ignore_material_prediction/epoch=9-step=1250000.ckpt"
piece_count_model = load_models.piece_count_model(piece_count_checkpoint)

base_checkpoint = "./models/full_training/masked_elo/epoch=9-step=1250000.ckpt"
base_model = load_models.base_elo_model(base_checkpoint)

base_no_mask_checkpoint = "./models/full_training/adaptive_elo/epoch=9-step=1250000.ckpt"
base_no_mask_model = load_models.base_elo_no_mask_model(base_no_mask_checkpoint)

no_elo_checkpoint = "./models/full_training/no_elo/epoch=9-step=1250000.ckpt"
no_elo_model = load_models.no_elo_model(no_elo_checkpoint)

number of parameters: 27.34M
number of parameters: 28.15M
number of parameters: 28.17M
number of parameters: 27.29M
number of parameters: 27.29M
number of parameters: 27.27M


In [4]:
trainer = pl.Trainer(
    logger=False,
    accelerator="gpu",
    max_epochs=10,
    callbacks=[pl.callbacks.RichProgressBar()],
    # precision="bf16-mixed",
)
# torch.set_float32_matmul_precision('high')

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [5]:
data_module = GamesDataModule(datasets=no_elo_dataset, tokenizer=no_elo_tokenizer, num_workers=12, mask_elo_token=False, batch_size=16)

Map (num_proc=6):   0%|          | 0/20000 [00:00<?, ? examples/s]

In [None]:
trainer.test(no_elo_model, datamodule=data_module)

In [52]:
data_module = GamesDataModule(datasets=dataset, tokenizer=piece_count_tokenizer, num_workers=12, mask_elo_token=False, batch_size=16)

Map (num_proc=6):   0%|          | 0/20000 [00:00<?, ? examples/s]

In [53]:
test_result = trainer.test(piece_count_model, datamodule=data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [54]:
data_module = GamesDataModule(datasets=material_dataset, tokenizer=material_tokenizer, num_workers=12, mask_elo_token=False, batch_size=16)

Map (num_proc=6):   0%|          | 0/20000 [00:00<?, ? examples/s]

In [55]:
test_result = trainer.test(material_model, datamodule=data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [21]:
material_model.masked_elo_test = False
test_result_2 = trainer.test(material_model, datamodule=data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [22]:
material_pair_data_module = GamesDataModule(test_games=list(games_material_pair), tokenizer=material_pair_tokenizer, num_workers=12)

In [23]:
test_result_3 = trainer.test(material_pair_model, datamodule=material_pair_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [24]:
material_pair_model.masked_elo_test = False
test_result_4 = trainer.test(material_pair_model, datamodule=material_pair_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [25]:
base_data_module = GamesDataModule(test_games=list(base_games), tokenizer=base_tokenizer, num_workers=12)
test_result_5 = trainer.test(base_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [26]:
base_model.masked_elo_test = False
test_result_6 = trainer.test(base_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [27]:
test_result_7 = trainer.test(base_no_mask_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [28]:
base_no_mask_model.masked_elo_test = False
test_result_8 = trainer.test(base_no_mask_model, datamodule=base_data_module)[0]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

In [29]:
masked_test_accuracy = {
    "material": test_result["test_acc"],
    "material_pair": test_result_3["test_acc"],
    "base": test_result_5["test_acc"],
    "base_no_mask": test_result_7["test_acc"]
}

unmasked_test_accuracy = {
    "material": test_result_2["test_acc"],
    "material_pair": test_result_4["test_acc"],
    "base": test_result_6["test_acc"],
    "base_no_mask": test_result_8["test_acc"]
}

In [30]:
print("Masked test accuracy:")
for key, value in masked_test_accuracy.items():
    print(f"{key}: {value:.4f}")

print("\nUnmasked test accuracy:")
for key, value in unmasked_test_accuracy.items():
    print(f"{key}: {value:.4f}")

Masked test accuracy:
material: 0.5216
material_pair: 0.5183
base: 0.5156
base_no_mask: 0.5128

Unmasked test accuracy:
material: 0.5060
material_pair: 0.5112
base: 0.5111
base_no_mask: 0.5152


**Wyjaśnienie**

Testy:
* Masked test accuracy - w trakcie testu maskujemy elo białego i sprawdzamy dokładność dla ruchów czarnego, i na odwrót i wyciągamy średnią
* Unmasked test accuracy - nic nie maskujemy i sprawdzamy dokładność dla wszystkich ruchów na raz

Modele (pierwsze 3 trenowane z maskowaniem jednego elo, ostatni bez)
* `material`: [ruch białych, materiał czarnych, ruch czarnych, materiał białych, ...] np. Pe2e4 39 Pd7d5 39 Pe4d5 38 Pa7a6 39 ...
* `material_pair`: Pe2e4 39|39 ...
* `base`: bez informacji o materiale: Pe2e4 Pe7e5 ...
* `base_no_mask`: jak `base` ale bez maskowania podczas treningu