# Chess Move Prediction Pipeline

This notebook loads player game data, constructs a PyTorch dataset for chess
positions, builds a convolutional neural network to predict moves, and trains
the model using variable-length legal move lists.

Sections:

1. Load and merge move, board, and game metadata  
2. Build a custom PyTorch dataset  
3. Implement residual CNN ResidualBlocks  
4. Build the main neural network  
5. Train the model with variable-length legal move lists  


In [None]:
import chess
import chess.utils as ut
import torch 
from torch.utils.data import Dataset
import torch.nn as nn
import numpy as np
import pandas as pd
from datetime import datetime
from torch.utils.data import DataLoader
import torch.nn.functional as F
from pathlib import Path as path
import sys

ROOT = path().resolve().parents[1]
DATA_ROOT = ROOT/'chess/data/game_data'
MODELS_ROOT = ROOT/'chess/models'

player = input("Enter player's last name: ")
print(f'Selected player: {player}')


Selected player: Carlsen


## Load Player Data

`Load_player_data()` loads and merges all the information required to train the
model:

- Reads move CSV and board CSV  
- Merges them into corresponding positions  
- Handles starting positions for first white moves  
- Adds ECO codes and game metadata  
- Fills missing FENs and material counts  
- Saves a combined CSV for debugging  
- Returns the unified DataFrame and ECO mapping dictionary  

This function prepares the cleaned data that drives the dataset class.


In [4]:
def find_folders(playername):
    matching_folders = [
        f for f in DATA_ROOT.iterdir()
        if f.is_dir() and playername.lower() in f.name.lower()
    ]
    return matching_folders

data_folders = find_folders(player)
data_folder = data_folders[0]

file = data_folder/f'{player}_moves.csv'

def replay(player):
    data = pd.read_csv(file)
    moves = data[['move', 'player', 'move_no']]
    fens = []
    illegal_count = 0
    games_count = 1
    for idx, row in moves.iterrows():
        move = row['move']
        players = row['player']
        move_no = row['move_no']

        if move_no == 1:
            repl_board = chess.Board()

        if player == players:
            fens.append(repl_board.fen())
        
        if move not in repl_board.legal_moves:
            illegal_count+=1
            row.drop()
            
        repl_board.push(chess.Move.from_uci(move))
    print(f'{len(fens)} fens loaded')
    return fens


def Load_player_data(player_name, move_csv_path, game_csv_path, eco_csv_path, save=False, file_path=None):
    '''

    '''
    if file_path:
        combined_data = pd.read_csv(file_path)

    elif file_path==None:
        # Load move and board data from the CSV
        move_data = pd.read_csv(move_csv_path, usecols=['game_id', 'move_no_pair', 'player', 'color', 'move', 'white_count', 'black_count'])
        
        # Filter moves made by the target player
        move_data = move_data[move_data['player'].str.contains(player_name, na=False)]

        # Load game metadata with game_id column for merging
        game_data = pd.read_csv(game_csv_path)

        # Merge game info to play_data on game_id
        combined_data = pd.merge(game_data, move_data, on='game_id', how='left')
        # combined_data = move_data
        raw_fens = replay(player)    
        combined_data['raw_fens'] = raw_fens

    # Save combined data
    if save:
        combined_data.to_csv(f"loaded_data/loaded_{player}_move_data.csv", index=False)

    return combined_data

_moves =  data_folder/f'{player}_moves.csv'
_game_info = data_folder/f'{player}_game_info.csv'
_eco_codes = data_folder/'eco_codes.csv'

magnus_data = Load_player_data(player, 
                 _moves, 
                 _game_info, 
                 _eco_codes,
                 save=False,
                 file_path=f'loaded_data/loaded_{player}_move_data.csv')

  combined_data = pd.read_csv(file_path)


## ChessDataset (PyTorch)

`ChessDataset` converts each row of the merged DataFrame into training-ready
tensors:

- Converts FEN → 6×8×8 tensor of piece planes  
- Computes legal moves using `python-chess`  
- Computes the target move index within legal moves  
- Constructs auxiliary numeric features  
- Returns a dictionary containing:

  - `"board"` : 6×8×8 tensor  
  - `"extra"` : auxiliary features  
  - `"legal_moves"` : `[N_i, 2]` tensor of legal moves  
  - `"target_index"` : index of the correct move  

This dataset supports variable-length legal move lists by returning a Python
list of samples instead of stacking them automatically.


In [5]:
class ChessDataset(Dataset):
    def __init__(self, data):
        """
        data: tuple (DataFrame, eco_to_idx) or just DataFrame
        """
        self.data = data[0] if isinstance(data, tuple) else data

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        raw_fen = row['raw_fens']
        board = chess.Board(raw_fen)

        # Legal moves
        legal_moves_uci = [m.uci() for m in board.legal_moves]
        legal_moves = [(m.from_square // 8, m.from_square % 8, m.to_square // 8, m.to_square % 8) for m in board.legal_moves]
        legal_moves_tensor = torch.tensor(legal_moves, dtype=torch.float32)  # [N,4]

        # Target index in legal moves
        target_move = (chess.Move.from_uci(row['move']).from_square // 8,
                       chess.Move.from_uci(row['move']).from_square % 8,
                       chess.Move.from_uci(row['move']).to_square // 8,
                       chess.Move.from_uci(row['move']).to_square % 8)
        
        if target_move not in legal_moves:
            print("FEN:", row['fen'])
            print("Move in CSV:", row['move'])
            print("Legal moves:", legal_moves_uci)
            print("Turn in FEN:", "White" if board.turn else "Black")

            
        target_index = legal_moves.index(target_move)
        target_index = torch.tensor(target_index, dtype=torch.long)

        # Board tensor
        board_tensor = self.convert_board_to_tensor(raw_fen)  # [6,8,8]

        # Extra features
        extra_features = torch.tensor([
            self.process_date_played(row['date_played']),
            int(row['move_no_pair']),
            0 if row['color'] == 'White' else 1
        ], dtype=torch.float32)

        return {
            "board": board_tensor,
            "extra": extra_features,
            "legal_moves": legal_moves_tensor,
            "target_index": target_index
        }
    
    def convert_board_to_tensor(self,board_fen):
        pieces = ['p', 'r', 'n', 'b', 'q', 'k']
        board = chess.Board(board_fen)
        board_str = str(board).replace(' ','').replace('\n','')
        layers = []
        for piece in pieces:
            arr = np.zeros((8,8), dtype=np.float32)
            for i,char in enumerate(board_str):
                row, col = divmod(i, 8)
                if char == piece:
                    arr[row, col] = -1
                elif char == piece.upper():
                    arr[row, col] = 1
            layers.append(arr)

        return torch.tensor(np.stack(layers), dtype=torch.float32)
    
    def convert_move_to_tensor(self, move):
        from_row, from_col, to_row, to_col = move

        from_arr = torch.zeros((8,8), dtype=torch.float32)
        to_arr = torch.zeros((8,8), dtype=torch.float32)

        from_arr[from_row, from_col] = 1
        to_arr[to_row, to_col] = 1

        move_tensor = torch.stack([from_arr, to_arr])
        return move_tensor
    
    def process_date_played(self, date_played):
        date_played = date_played.replace("??", "01")
        dt = datetime.strptime(date_played, "%Y.%m.%d")
        epoch = datetime(1970,1,1)
        delta_days = (dt - epoch).days
        return delta_days


## NeuralNet (Convolutional Neural Network)

The neural network processes each board position as follows:

1. Initial convolution over the 6×8×8 board planes  
2. Stack of residual blocks  
3. Flatten into a feature vector  
4. Combine with auxiliary features  
5. (Later) score legal moves  

This network does *not* assume a fixed number of moves, so it does not output a
fixed-size vector. Instead, during training, we pass the legal moves separately
and compute per-move scores inside the training loop or inside a higher-level
selector model.

In [6]:
class ResidualBlock(nn.Module):

    def __init__(self, hidden_size):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(hidden_size, hidden_size, 3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(hidden_size, hidden_size, 3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(hidden_size)
        self.bn2 = nn.BatchNorm2d(hidden_size)
        self.activation1 = nn.ReLU()
        
        self.layers = nn.Sequential(self.conv1, self.bn1, self.activation1, self.conv2, self.bn2)

        self.activation2 = nn.ReLU()

    def forward(self, x):
        x_input = torch.clone(x)    # save copy of x
        x = self.layers(x)          # pass through block layers
        x = x + x_input             # skip connection
        x = self.activation2(x)     # final relu activation
        return x  

class NeuralNet(nn.Module):
    '''
    A convolutional neural network for predicting chess moves from a board tensor
    and auxiliary (non-spatial) features.

    The network processes a 6×8×8 input tensor representing piece placements across
    six channels (e.g., piece-type × color planes). It applies an initial convolution,
    a stack of residual blocks, then flattens the result and combines it with
    additional non-board features. 
    '''

    def __init__(self, hidden_layers=10, hidden_size=50, extra_feature_dim=3, board_emb=256, moves_emb=128):
        super().__init__()
        '''
        Initialize the neural network.

        Args:
            hidden_layers (int):
                Number of residual blocks applied after the initial convolution.
            hidden_size (int):
                Number of feature channels in the convolutional and residual layers.
            extra_feature_dim (int):
                Dimension of the auxiliary non-board input feature vector
            board_emb (int):
                board embedding dimension
            moves_emb (int):
                move embedding dimension

        Components created:
            • input_conv: first 3×3 convolution mapping 6 channels → hidden_size  
            • bn_input: batch normalization for the input convolution  
            • activation: shared ReLU activation  
            • res_blocks: a Sequential container of `hidden_layers` residual blocks  
            • flatten: flattens convolutional output to a vector  
            • fc_extra: linear layer that embeds auxiliary features to 64 units  

        '''
        self.input_conv = nn.Conv2d(6, hidden_size, kernel_size=3, padding=1)
        self.bn_input = nn.BatchNorm2d(hidden_size)
        self.activation = nn.ReLU()

        # Add the residual blocks
        self.res_blocks = nn.Sequential(
            *[ResidualBlock(hidden_size) for _ in range(hidden_layers)]
        )

        self.flatten = nn.Flatten()
        self.fc_extra = nn.Linear(extra_feature_dim, board_emb)
        self.fc_legal = nn.Linear(4, moves_emb)
        self.fc_board = nn.Linear(hidden_size * 8 * 8, board_emb)

        self.combine_fc = nn.Linear(board_emb + moves_emb, 1)


    def forward(self, board_tensor_batch, extra_features_batch, legal_move_batch):
        '''
        Run a forward pass of the network.

        Args:
            board_tensor (Tensor):
                A float tensor of shape (batch_size, 6, 8, 8) representing the chess
                board. Each of the 6 channels typically encodes a piece type and color
                (e.g., white pawns, white pieces, black pawns, …).

            extra_features (Tensor):
                A tensor of shape (batch_size, extra_feature_dim) containing
                side-information not encoded spatially (e.g., castling rights,
                fifty-move counter, who's to move).
                
            legal_moves (Tensor): 
                A tensor of legal moves with square index coordinates
        Returns:

        '''
        
        B = board_tensor_batch.size(0)
        scores_list = []

        # board features computed ONCE for entire batch
        board_tensor_batch = self.input_conv(board_tensor_batch)
        board_tensor_batch = self.bn_input(board_tensor_batch)
        board_tensor_batch = self.activation(board_tensor_batch)
        board_tensor_batch = self.res_blocks(board_tensor_batch)
        board_tensor_batch = self.flatten(board_tensor_batch)
        board_tensor_batch = self.fc_board(board_tensor_batch)
        board_tensor_batch = self.activation(board_tensor_batch)

        # extra feature embedding computed ONCE for entire batch
        extra_features_batch = self.fc_extra(extra_features_batch)
        extra_features_batch = self.activation(extra_features_batch)

        # iterate over B for legal move layers, 
        # legal move rows are of nonuniform shape, 
        # so operations over the whole batch become impossible
        for i in range(B):
            x_board = board_tensor_batch[i]
            x_extra = extra_features_batch[i]
            legal_moves = legal_move_batch[i]

            # Legal Moves fully connected Layers
            x_legal = self.fc_legal(legal_moves.float())
            x_legal = self.activation(x_legal)

            # Combine and concatenate board, extra features and legal moves
            x_combined = x_board + x_extra
            x_combined = x_combined.unsqueeze(0).expand(x_legal.size(0), -1)  # [N_i, board_emb] # expand tensor to max number of rows of x_legal while keeping the number of columns
            combined = torch.cat([x_combined, x_legal], dim=1)
            
            combined = self.combine_fc(combined)
            score = combined.squeeze(1)

            scores_list.append(score)

        return scores_list 

## Training Loop (train_model2)

The training loop:

- Creates a dataset and DataLoader  
- Uses a custom `collate_fn` to avoid stacking variable-length legal move lists  
- Stacks board and extra feature tensors  
- Leaves legal moves as a Python list (`[N_i,2]` per sample)  
- Calls the model to get per-move probability distributions  
- Applies negative log-likelihood loss using the target move index  
- Updates the model with Adam optimizer  
- Saves model weights after each epoch  

This training approach supports variable-length move sets using loops rather
than fixed-size tensors, simplifying the model design.


In [None]:
def train_model(modelname, dataset, num_epochs=100, batch_size=32, lr=2.5e-5, clip_grad=1.0, epsilon=1e-8):
    data_train = ChessDataset(dataset)
    data_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True, collate_fn=lambda x: x)
    model = NeuralNet()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.train()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch in data_loader:
            optimizer.zero_grad()

            # Stack the tensors by batch size
            board = torch.stack([item['board'] for item in batch]).to(device)
            extra = torch.stack([item['extra'] for item in batch]).to(device)

            legal_moves_list = [item['legal_moves'].to(device) for item in batch]
            target_indices = [item['target_index'].to(device) for item in batch]

            outputs = model(board, extra, legal_moves_list)  # list of [N_i] probabilities

            # Compute average loss over entire batch
            loss = 0.0
            loss_fn = torch.nn.CrossEntropyLoss()
            sample_losses = []

            for scores, target_idx in zip(outputs, target_indices):
                scores = scores.unsqueeze(0)       
                target = target_idx.unsqueeze(0)
                sample_losses.append(loss_fn(scores, target))

            loss = torch.stack(sample_losses).mean()

            # Backwards propagate loss
            loss.backward()

            # # Gradient clipping to stabilize training
            # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=clip_grad)

            optimizer.step()
            running_loss += loss.item()

        avg_loss = running_loss / len(data_loader)
        print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

        # Save model after every epoch
        torch.save(model.state_dict(), MODELS_ROOT/f'{modelname}.pth')



## Train on Magnus Carlsen's Games

We load Magnus Carlsen's move history, merge it with game data and ECO codes,
construct training samples, and train the network.

This provides the full pipeline:

Raw CSV data → Cleaned dataset → PyTorch network → Training loop.


## Test the model

We load the created model and play a healthy game of chess

In [None]:
# Load your trained model
from Agents import Agent
agent_name = input('Player to Load:')
agent = 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet().to(device)
model.load_state_dict(torch.load('models/magnus_120_epochs.pth', map_location=device))
model.eval()