In [None]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



/kaggle/input/chess-games/chess_games(1).db


In [None]:
import sqlite3 as sq
import random
from sklearn.model_selection import train_test_split
from transformers import (
    GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling,
    Trainer, TrainingArguments, TextDataset
)

2025-05-21 08:47:22.963781: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747817243.205980      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747817243.271074      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


At first the reduced dataset were on a DB with 100k matches, due to that the fineTuning was on 90k matches leaving the other 10k for testing/validation 

PARAMETER SELECTION

In [13]:
import torch
from torch.utils.data import Dataset,DataLoader
from tqdm import tqdm


# PARAMETERS (modify if needed)
DB_PATH = "/kaggle/input/chess-games/chess_games(1).db"
TRAIN_SIZE = 90000
TEST_SIZE = 10000
BLOCK_SIZE = 256

BATCH_SIZE = 16
EPOCHS = 4
LEARNING_RATE = 5e-5
OUTPUT_DIR = "/kaggle/working/chess_gpt2_outputV2"
LOG_FILE = os.path.join(OUTPUT_DIR, "eval_log.txt")

We create the traning dataset as the sequence of moves within each game

In [4]:
# Load data from DB
conn = sq.connect(DB_PATH)
df = pd.read_sql("SELECT game_id, move_number, move FROM moves ORDER BY game_id, move_number", conn)
games = df.groupby("game_id")["move"].apply(lambda x: ' '.join(x)).tolist()
conn.close()

Train and Test set division

In [None]:
SEED = 42  # Relevant for the legality rate used for evaluate the model in zeroShot

random.seed(SEED)
np.random.seed(SEED)  
torch.manual_seed(SEED) 

# Sample and split
games = random.sample(games, min(len(games), TRAIN_SIZE + TEST_SIZE))

train_games, test_games = train_test_split(
    games,
    test_size=TEST_SIZE,
    random_state=SEED)

In [15]:
class ChessGameDataset(Dataset):
    def __init__(self, games, tokenizer, block_size=128):
        self.examples = []
        for game in games:
            tokens = tokenizer(game + tokenizer.eos_token, truncation=True, max_length=block_size, padding="max_length")
            self.examples.append(torch.tensor(tokens["input_ids"]))

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, i):
        return {"input_ids": self.examples[i], "labels": self.examples[i]}

### GPT2 FineTuning

In [None]:

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # necessary for padding
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.resize_token_embeddings(len(tokenizer))

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Embedding(50257, 768)

In [16]:
# Prepare datasets
dataset_train = ChessGameDataset(train_games, tokenizer, BLOCK_SIZE)
dataset_test = ChessGameDataset(test_games, tokenizer, BLOCK_SIZE)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [17]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    overwrite_output_dir=True,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    logging_dir=OUTPUT_DIR,
    logging_steps=10,
    report_to="none"
)

Evaluation Function

In [None]:

def compute_accuracy(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    labels = torch.tensor(labels)
    mask = labels != -100
    correct = (predictions == labels) & mask
    accuracy = correct.sum().item() / mask.sum().item()
    with open(LOG_FILE, "a") as f:
        f.write(f"Accuracy: {accuracy:.4f}\n")
    return {"accuracy": accuracy}

Fine Tuning and saving of the evaluation obtained(the actual directory has been changed)

In [None]:
from tqdm import tqdm
LOG_FILE = "/kaggle/working/chess_gpt2_outputV2/eval_log.txt"

os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

# Dataloader
train_loader = DataLoader(
    dataset_train,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=data_collator
)

optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

# Training + evaluation for epochs
for epoch in range(EPOCHS):
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {epoch}")
    for batch in loop:
        inputs = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        model.zero_grad()
        loop.set_postfix(loss=loss.item())

    # === EVALUATION ===
    model.eval()
    eval_loader = DataLoader(
        dataset_test,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=data_collator
    )
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in eval_loader:
            inputs = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**inputs)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)
            labels = inputs["labels"]
            mask = labels != -100
            correct += ((predictions == labels) & mask).sum().item()
            total += mask.sum().item()

    accuracy = correct / total
    with open(LOG_FILE, "a") as f:
        f.write(f"Epoch {epoch}: Accuracy = {accuracy:.4f}\n")
    print(f"Epoch {epoch} - Accuracy: {accuracy:.4f}")

# Save model and tokenizer
model.save_pretrained(os.path.join(OUTPUT_DIR, "model"))
tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "model"))

Epoch 0: 100%|██████████| 5625/5625 [52:39<00:00,  1.78it/s, loss=0.781]


Epoch 0 - Accuracy: 0.0000


Epoch 1: 100%|██████████| 5625/5625 [52:40<00:00,  1.78it/s, loss=0.732]


Epoch 1 - Accuracy: 0.0000


Epoch 2: 100%|██████████| 5625/5625 [52:41<00:00,  1.78it/s, loss=0.704]


Epoch 2 - Accuracy: 0.0000


Epoch 3: 100%|██████████| 5625/5625 [52:42<00:00,  1.78it/s, loss=0.626]


Epoch 3 - Accuracy: 0.0000


('/kaggle/working/chess_gpt2_outputV2/model/tokenizer_config.json',
 '/kaggle/working/chess_gpt2_outputV2/model/special_tokens_map.json',
 '/kaggle/working/chess_gpt2_outputV2/model/vocab.json',
 '/kaggle/working/chess_gpt2_outputV2/model/merges.txt',
 '/kaggle/working/chess_gpt2_outputV2/model/added_tokens.json')

The next token accuracy remain strangely at 0.0000 through al the epochs which is strange but in the zeroShot can be seen that the model has actually improve is ability in generating moves from a moveHistory

This could be due to a great variability of the next moves possibilities or to the low dimension of the dataset used