In [1]:
import torch
import chess
import random

In [2]:
from src.model import ChessNet
from src.dataclass import ChessDataset
from src.encode import get_canonical_board, index_to_uci

In [3]:
# Step 1: Load the validation set, in this case we take January 2016
val_dataset = ChessDataset("data/csv/le_2016-01.csv")
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

In [4]:
# Step 2: Initialize the model and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChessNet()

In [5]:
# Step 3: Load the trained model
checkpoint = torch.load(f"models/model.2.40.pth")
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [6]:
# Step 4: Cast model to device and enter evaluation mode
model.to(device)
model.eval()

ChessNet(
  (conv1): Conv2d(18, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=4096, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=4544, bias=True)
)

In [7]:
# Step 5: Predict how many times the model picks the best move or at least one of the three best moves
correct_top_1 = 0
correct_top_3 = 0
total = 0

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        probabilities = torch.softmax(outputs, dim=1)

        pred_top_1 = probabilities.argmax(dim=1)
        correct_top_1 += (pred_top_1 == labels).sum().item()

        pred_top_3 = torch.topk(probabilities, k=3, dim=1).indices
        correct_top_3 += sum([labels[i] in pred_top_3[i] for i in range(len(labels))])

        total += labels.size(0)

top_1_accuracy = 100 * correct_top_1 / total
top_3_accuracy = 100 * correct_top_3 / total

In [8]:
print(f"\nValidation Results:\nTop-1 accuracy: {top_1_accuracy:.2f}%\nTop-3 accuracy: {top_3_accuracy:.2f}%")


Validation Results:
Top-1 accuracy: 48.72%
Top-3 accuracy: 74.73%


In [9]:
# With only 5 epochs of training the model manages to find the best move 28% of the time and half the time it finds
# one of the three best moves. This looks promising but we have to visualize the performance more. 

# With helper planes and 50 epochs, the loss is 1.7 and the top1 accuracy is 48.72% while top3 acc is 74.73%

In [10]:
# Step 6: Create a function to have the model predict a legal move
def predict_move(board):
    legal_moves = [element.uci() for element in board.legal_moves]
    model_input = torch.from_numpy(get_canonical_board(board.fen())).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(model_input)
        probs = torch.softmax(logits, dim=1).squeeze()
        top_moves = torch.topk(probs, k=3)
        shuffled_moves = top_moves.indices[torch.randperm(top_moves.indices.nelement())]
        for index in shuffled_moves:
            uci_move = index_to_uci[index.item()]
            if uci_move in legal_moves:
                return chess.Move.from_uci(uci_move)
    return random.choice(list(board.legal_moves))

In [11]:
# Step 7: Get the pgn of a game played by the network for evaluation
import chess.pgn

game = chess.pgn.Game()
game.headers["Event"] = "Evaluation Game"
game.headers["Site"] = "Local"
game.headers["White"] = "Human"
game.headers["Black"] = "Model"

node = game

board = chess.Board()
while not board.is_game_over():
    move = predict_move(board)
    board.push(move)
    node = node.add_variation(move)
print("\nGame over:", board.result())
with open("data/eval/pgn_1.pgn", "w") as pgn_file:
    print(game, file=pgn_file)

print("Game saved to game_output.pgn — upload it to Lichess to review!")


Game over: 1-0
Game saved to game_output.pgn — upload it to Lichess to review!
