In [1]:
import mlflow
import mlflow.pytorch
import torch
from torch.utils.data import DataLoader
from dataset_classes.chess_dataset import ChessDataset
from torch.nn.utils.rnn import pad_sequence
import pandas as pd
from preprocessing.preprocess_my_lichess import PreprocessMyLichess
from constants import *
import os

In [2]:
pgn_filename = "lichess_BabaGata_2025-02-02.pgn"
pkl_file_path = os.path.join(MY_DIR, "preprocessed", pgn_filename.replace("pgn", "pkl"))
if not os.path.exists(pkl_file_path):
    processor = PreprocessMyLichess(pgn_filename, username='BabaGata')
    processor.process_data()

df = pd.read_pickle(pkl_file_path)

In [3]:
df.head()

Unnamed: 0,matrices,encoded_eco,opening_ply,result,player_color
0,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",246,10,1-0,black
1,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",241,10,1-0,white
2,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",301,10,1-0,black
3,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",100,10,0-1,black
4,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",250,10,1-0,white


In [4]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set the experiment name for MLflow
mlflow.set_experiment("Chess Opening Classification")

# Load the best model from the MLflow run
# Replace the experiment ID and run ID with your values mlruns\819841942341856253\de7114d7a07c4c529cd2dfabd4fc07ca
experiment_id = "819841942341856253"
run_id = "de7114d7a07c4c529cd2dfabd4fc07ca"

# Fetch the model from MLflow
logged_model = f"runs:/{run_id}/final_model"
model = mlflow.pytorch.load_model(logged_model)
model.to(device)
model.eval()

Using device: cuda


ChessOpeningClassifier(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (rnn): LSTM(128, 128, batch_first=True)
  (fc1): Linear(in_features=128, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=500, bias=True)
)

In [5]:
def collate_fn(batch):
    inputs, labels = zip(*batch)
    inputs = [inp for inp in inputs]
    padded_inputs = pad_sequence(inputs, batch_first=True, padding_value=0.0)
    labels = torch.tensor(labels, dtype=torch.long)
    return padded_inputs, labels

In [6]:
# Load a dataset for prediction (use the same method as during training)
prediction_dataset = ChessDataset(df, max_moves=40)
prediction_loader = DataLoader(prediction_dataset, batch_size=32, collate_fn=collate_fn, shuffle=False)

In [7]:
# Make predictions on the dataset
predictions = []
true_labels = []

with torch.no_grad():
    for inputs, labels in prediction_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        outputs = model(inputs)
        predicted_labels = torch.argmax(outputs, dim=1)
        
        predictions.extend(predicted_labels.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

In [8]:
# Add predictions to DataFrame
df["predicted_eco"] = predictions

# Save the updated DataFrame
predictions_path = os.path.join(MY_DIR, "predictions", pgn_filename.replace(".pgn", "_1900.pkl"))
df.to_pickle(predictions_path)

print(f"Predictions saved to: {predictions_path}")

Predictions saved to: data/my_data/predictions\lichess_BabaGata_2025-02-02_1900.pkl


In [9]:
df.head()

Unnamed: 0,matrices,encoded_eco,opening_ply,result,player_color,predicted_eco
0,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",246,10,1-0,black,244
1,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",241,10,1-0,white,249
2,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",301,10,1-0,black,301
3,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",100,10,0-1,black,299
4,"[[[-4, -2, -3, -5, -6, -3, -2, -4], [-1, -1, -...",250,10,1-0,white,240
