In [1]:
import torch
import chess
import numpy as np

In [2]:
from src.model import ResNet
from src.dataclass import ChessDataset
from src.encode import get_canonical_board, uci_to_index, all_uci_moves

In [3]:
# Step 1: Load the validation set, in this case we take January 2016
val_dataset = ChessDataset("data/csv/le2017-01.csv")
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=4)

In [4]:
# Step 2: Initialize the model and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ResNet(filters=128, res_blocks=6)

In [5]:
# Step 3: Load the trained model
checkpoint = torch.load(f"models/model5/model.50.pth")
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [6]:
# Step 4: Cast model to device and enter evaluation mode
model.to(device)
model.eval()

ResNet(
  (start_block): Sequential(
    (0): Conv2d(18, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (res_tower): ModuleList(
    (0-5): 6 x ResBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (batch2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (policy_head): Sequential(
    (0): Conv2d(128, 2, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Flatten(start_dim=1, end_dim=-1)
    (4): Linear(in_features=128, out_features=4544, bias=True)
  )
  (value_head): Se

In [8]:
# Step 5: Predict how many times the model picks the best move or at least one of the three best moves
correct_top_1 = 0
correct_top_3 = 0
total = 0

with torch.no_grad():
    for inputs, labels, values in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        values = values.to(device).float()

        policy, value = model(inputs)
        probabilities = torch.softmax(policy, dim=1)

        pred_top_1 = probabilities.argmax(dim=1)
        correct_top_1 += (pred_top_1 == labels).sum().item()

        pred_top_3 = torch.topk(probabilities, k=3, dim=1).indices
        correct_top_3 += sum([labels[i] in pred_top_3[i] for i in range(len(labels))])

        total += labels.size(0)

top_1_accuracy = 100 * correct_top_1 / total
top_3_accuracy = 100 * correct_top_3 / total

In [9]:
print(f"\nValidation Results:\nTop-1 accuracy: {top_1_accuracy:.2f}%\nTop-3 accuracy: {top_3_accuracy:.2f}%")


Validation Results:
Top-1 accuracy: 53.53%
Top-3 accuracy: 81.48%


In [10]:
# With only 5 epochs of training the model manages to find the best move 28% of the time and half the time it finds
# one of the three best moves. This looks promising but we have to visualize the performance more. 

# With helper planes and 50 epochs, the loss is 1.7 and the top1 accuracy is 48.72% while top3 acc is 74.73%

# ResNet seems to perform worse than our second version model. One reason might be that the dataset is not that large. 

# Training the ConvNet with both a policy and value head this time, again gives me top1 47.99 and top3 74.1

# Training resnet with policy and value head (alpha zero style) with 128 filters and 6 resblocks on a dataset of 25million positions.
# After 28 epochs it achieves top1 acc 53% and top3 acc 80.95% on the validation set

# Made mistake earlier and had val on 2016 now changed to 2017

# top1 53.53 top3 81.48 still super good

In [7]:
# Step 6: Predict a good legal move
ACTION_SIZE = 4544

def mask_actions(indices):
    total_actions = [0] * ACTION_SIZE
    for index in indices:
        total_actions[index] = 1
    return total_actions

def get_model_output(board):
    model_input = torch.from_numpy(get_canonical_board(board.fen())).unsqueeze(0).to(device)
    with torch.no_grad():
        logits, _ = model(model_input)
        probs = torch.softmax(logits, dim=1).squeeze()
    return probs.squeeze(0).detach().cpu().numpy()

def filter_policy(probs, mask):
    probs = probs * mask
    probs = probs / np.sum(probs)
    return probs

def get_policy(board, threshold=0.05):
    legal_moves = [element.uci() for element in board.legal_moves]
    legal_indices = []
    for legal_move in legal_moves:
        legal_indices.append(uci_to_index[legal_move])
    total_actions = mask_actions(legal_indices)
    policy = get_model_output(board)
    policy = filter_policy(policy, total_actions)
    best_indices = np.where(policy > threshold)[0]
    total_actions = mask_actions(best_indices)
    policy = filter_policy(policy, total_actions)
    return policy

def get_uci_move(board):
    probs = get_policy(board)
    uci_move = np.random.choice(all_uci_moves, p=probs)
    return chess.Move.from_uci(uci_move)

In [13]:
# Step 7: Get the pgn of a game played by the network for evaluation
import chess.pgn

game = chess.pgn.Game()
game.headers["Event"] = "Evaluation Game"
game.headers["Site"] = "Local"
game.headers["White"] = "Human"
game.headers["Black"] = "Model"

node = game

board = chess.Board()
while not board.is_game_over():
    move = get_uci_move(board)
    board.push(move)
    node = node.add_variation(move)
print("\nGame over:", board.result())
with open("data/eval/v5.5.pgn", "w") as pgn_file:
    print(game, file=pgn_file)

print("Game saved to game_output.pgn — upload it to Lichess to review!")


Game over: 1-0
Game saved to game_output.pgn — upload it to Lichess to review!
