In [None]:
! pip install chess -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m153.6/154.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import chess
import re
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import gc
from random import sample

Getting the chess_games.csv file:
1. Create a Kaggle account and download your kaggle.json file by clicking the "create token" button, so you will be able to access the kaggle database. Upload it to the folder

In [None]:
# TRAINING ONLY
# Download game data
! pip install kaggle -q
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json # hides your private key (the info in the kaggle.json file)
! kaggle datasets download arevel/chess-games
! unzip -qq /content/chess-games.zip

# Or just download the whole dataset manually and drag it into the same folder as your notebook
# https://www.kaggle.com/datasets/arevel/chess-games

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/arevel/chess-games
License(s): CC0-1.0
Downloading chess-games.zip to /content
100% 1.45G/1.45G [00:56<00:00, 18.4MB/s]
100% 1.45G/1.45G [00:56<00:00, 27.5MB/s]


In [None]:
# TRAINING ONLY
# Filter game data
chess_data_raw = pd.read_csv('chess_games.csv', usecols=['AN','WhiteElo'])  # we will only take the games ('AN') and 'WhiteElo' from the dataset
chess_data = chess_data_raw[chess_data_raw['WhiteElo'] > 1900]  # only keep games with WhiteElo > 1900
del chess_data_raw
gc.collect()
chess_data = chess_data[['AN']] #  remove elo column
chess_data = chess_data[~chess_data['AN'].str.contains('{')]  # remove games with comments from dataset, because comments in PGN will special characters that the neural network can't read like '{'
chess_data = chess_data[chess_data['AN'].str.len() > 20]  # remove games that are too short

In [None]:
board = chess.Board()
print(board)

r n b q k b n r
p p p p p p p p
. . . . . . . .
. . . . . . . .
. . . . . . . .
. . . . . . . .
P P P P P P P P
R N B Q K B N R


The Board object must be converted into a tensor before it can be input into the neural network. \
How? One idea might be to use different numbers to represent different pieces (e.g. pawns 1, knight 3, bishop 5, rook 7, queen 9, king 11), but it is misleading in terms of the relationships between the pieces. It suggests that a pawn is more similar to a knight than a bishop, or rook, which might be true in terms of piece material but definitely not movement rules, which the network must learn for itself. Besides, what material value would we assign the King? It's coverage is far worse than a queen, or rook, but we also can't afford to lose it!

The alternative? We'll use bitboards to represent the board, which means using an 8x8 layer of and zeroes for each type of piece. \
Our Rook bitboard would look like this at the beginning of the game:
<pre>
[-1, 0, 0, 0, 0, 0, 0, -1]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 0, 0, 0, 0, 0, 0, 0,  0]
[ 1, 0, 0, 0, 0, 0, 0,  1]
</pre>
'1' represent white pieces, while '-1' represents black pieces

In [None]:
# meaning of the term 'tensor'
# a list or scalar is an example of a 1-D tensor, a nested list or matrix is a 2D tensor, a nested nested list is a 3D tensor, and so on.

letter_2_num = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7}
num_2_letter = {0:'a', 1:'b', 2:'c', 3:'d', 4:'e', 5:'f', 6:'g', 7:'h'}

# use the create_rep_layer function on each piece and stack the layers together to create a 3D tensor (6x8x8) representing the board state
# 6 pieces + 8 rows and 8 columns for each bitboard
def board_2_rep(board):
	pieces = ['p', 'r', 'n', 'b', 'q', 'k']
	layers = []
	for piece in pieces:
		layers.append(create_rep_layer(board, piece))
	board_rep = np.stack(layers)
	return board_rep


def create_rep_layer(board, type):
	s = str(board)
	s = re.sub(f'[^{type}{type.upper()} \n]', '0', s) # replace all characters that are NOT the chosen piece type or newline characters with a '0'
	s = re.sub(f'{type}', '-1', s) # replace lowercase letters (black pieces) of the chosen piece type with -1
	s = re.sub(f'{type.upper()}', '1', s) # replace uppercase letters (white pieces) of the chosen piece type with 1

	# convert the string into a 2D tensor (8x8) of float32 values.
	# I used float32 because it was the only value that the neural networks I built with pytorch's nn.Module accepted.
	# There might be a way to change the settings that I don't know of.
	board_mat = []
	for row in s.split('\n'):
		row = row.split(' ')
		row = [np.float32(x) for x in row]
		board_mat.append(row)

	return np.array(board_mat)

# moves are represented as a 3D tensors (2x8x8) with one (8x8) matrix representing which square the piece was moved from, and another representing which square the piece was moved to
# pawn e4 would look like this:
# 'from_square' matrix
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 1, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# 'to_square' matrix
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 1, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
# [0, 0, 0, 0, 0, 0, 0, 0]
def move_2_rep(move, board):
	board.push_san(move).uci()
	move = str(board.pop())
	from_output_layer = np.zeros((8, 8))
	from_row = 8 - int(move[1])
	from_col = letter_2_num[move[0]]
	from_output_layer[from_row, from_col] = 1

	to_output_layer = np.zeros((8, 8))
	to_row = 8 - int(move[3])
	to_col = letter_2_num[move[2]]
	to_output_layer[to_row, to_col] = 1

	return np.stack([from_output_layer, to_output_layer])

# this function removes all the digits represeting the move numbers e.g. the "1." and "2." in (1. e4 e5 2. Nf3 Nf6)
# It then splits the string of moves into a list of moves. The final move is dropped since it usually contains special characters like '#' for checkmate
# Input: "1. f3 e6 2. g4 Qh4#"
# Output: ['f3', 'e6', 'g4']
def create_move_list(s):
	return re.sub('\d*\. ', '', s).split(' ')[:-1]

In [None]:
#TRAINING ONLY
# Creating a dataset class that inherits from pytorch's Dataset class
# The Dataset object's role is to convert the data from the chess_games.csv file into a dataset the neural network can understand
class ChessDataset(Dataset):
	def __init__(self, games):
		super(f).__init__()
		self.games = games

	def __len__(self):
		return 1000  # Total number of examples in one ChessDataset object. Original size was 40000 but I changed it in case anyone wants to try training it for a bit

	def __getitem__(self, index):
		game_i = np.random.randint(self.games.shape[0]) # Shape[0] returns the first dimension of the games dataset passed into the ChessDataset object (the number of rows, which corresponds to the number of games).
		random_game = chess_data['AN'].values[game_i]
		moves = create_move_list(random_game)
		game_state_i = np.random.randint(len(moves) - 1)
		# Set the move that we want the network to predict as the next_move variable
		next_move = moves[game_state_i]
		# Play out the rest of the moves on the board to get us the current
		moves = moves[:game_state_i]
		board = chess.Board()
		for move in moves:
			board.push_san(move)
		x = board_2_rep(board)
		y = move_2_rep(next_move, board)
		if game_state_i % 2 == 1: # If it's black's turn to move invert the numbers so that the positive numbers always represent the side whose turn it is to move
			x *= -1
		return x, y # Return a list with the position at index 0 and next_move at index 1

data_train = ChessDataset(chess_data['AN'])
data_train_loader = DataLoader(data_train, batch_size=32, shuffle=True, drop_last=True) # Returns an iterable of the Dataset that feeds position + next_move pairings.
# Batch_size indicates the network will be fed tensors with 32 position + next_move pairings each time. This is generally faster than performing the whole backpropagation step one game at a time
# Shuffle randomizes the order of the dataset
# Drop last means the Dataloader will not create another example if there are not enough position + next_move pairings left to create a batch of 32
print(len(data_train_loader))
i = next(iter(data_train_loader))
print(i[0].shape) # 32 position tensors (6 because there are 6 types of pieces and one layer for each)
print(i[1].shape) # 32 coresponding next_move tensors (2 because we have a 'from+square' layer and a 'to_square' layer)

31
torch.Size([32, 6, 8, 8])
torch.Size([32, 2, 8, 8])


In [None]:
# One repeatable chunk of the ChessNet
class ChessNetLayer(nn.Module):
	def __init__(self, hidden_size=200):
		super().__init__()
		self.conv1 = nn.Conv2d(hidden_size, hidden_size, 3, 1, 1) # Convolutional layers help with pattern recognition
		self.conv2 = nn.Conv2d(hidden_size, hidden_size, 3, 1, 1)
		self.bn1 = nn.BatchNorm2d(hidden_size) # Batch normalization is like how we normalize/scale data when we feed it into the network, but instead it's between layers
		self.bn2 = nn.BatchNorm2d(hidden_size)
		self.activation1 = nn.SELU() # SELU is like RELU but without the dying RELU problem
		self.activation2 = nn.SELU()
	def forward(self, x):
		# What order to connect the layers in
		x_input = torch.clone(x)
		x = self.conv1(x)
		x = self.bn1(x)
		x = self.activation1(x)
		x = self.conv2(x)
		x = self.bn2(x)
		x += x_input # Skip connection to help against vanishing gradients. Gradients don't change weights by much once they get too small as we converge at a minimum. Skip connections provide a more direct connection between the final output and the input
		x = self.activation2(x)
		return x

class ChessNet(nn.Module):

	def __init__(self, hidden_layers=6, hidden_size=500):
		super().__init__()
		self.hidden_layers = hidden_layers
		self.input_layer = nn.Conv2d(6, hidden_size, 3, 1, 1) # Input the 6 layers of the position tensor
		self.module_list = nn.ModuleList([ChessNetLayer(hidden_size) for i in range(hidden_layers)]) # Add [hidden_layers] (6 in our case) instances of the hidden layer block to our network one after the other
		self.output_layer = nn.Conv2d(hidden_size, 2, 3, 1, 1) # Output a matrix with probabilities for the 'from' and 'to' squares. The squares with higher numbers represent the ones that the neural network prefers.

	def forward(self, x):
		# What order to connect the layers in
		x = self.input_layer(x)
		x = F.relu(x)

		for i in range(self.hidden_layers):
			x = self.module_list[i](x)

		x = self.output_layer(x)

		return x

In [None]:
#TRAINING ONLY RUN ONCE
model = ChessNet().to('cuda') # to.('cuda') means we are sending the network to a GPU
metric_from = nn.CrossEntropyLoss() # Setting the function we'll use to calculate loss.
metric_to = nn.CrossEntropyLoss() # Usually some function of the difference between the output of the network and the actual next_move

In [None]:
#TRAINING ONLY
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5) # Method of minimizing the loss function. lr = learn rate, weight decay penalizes very large weights
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5) # Scheduler multiplies the learning rate by a factor of gamma every step_size epochs as we approach the minimum
epochs = 10
for e in range(epochs):
	running_loss = 0
	for position, move in data_train_loader:
		position = position.to('cuda')
		move = move.to('cuda')
		output = model(position)

		loss_from = metric_from(output[:,0,:], move[:,0,:]) # Slicing multi dimensional arrays. First ':' means include all elements in the first dimension of our array (the 32 position + next_move batches). 0 means take the first item of the second dimension (the from_square matrix), and the final : includes the next two 8x8 dimensions of our tensor which reprsent the board
		loss_to = metric_to(output[:,1,:], move[:,1,:])
		loss = loss_from + loss_to


		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		running_loss += loss.item()
	print(f"Training loss: {running_loss/len(data_train_loader)}")
	scheduler.step()



In [None]:
def check_mate_single(board):
	board = board.copy()
	legal_moves = list(board.legal_moves)
	for move in legal_moves:
		board.push_uci(str(move))
		if board.is_checkmate():
			move = board.pop()
			return move
		_ = board.pop()

# Converts numbers of the from_square and to_square matrices into probabilities that sum  to 1
def distribution_over_moves(vals):
	probs = np.array(vals)
	probs = np.exp(probs)
	probs = probs / probs.sum()
	probs = probs ** 3
	probs = probs / probs.sum()
	return probs

def choose_move(board, model):
		legal_moves = list(board.legal_moves)

		move = check_mate_single(board)
		if move is not None:
			return move

		x = torch.Tensor(board_2_rep(board)) # Convert board into tensor
		if board.turn == chess.BLACK: # If it's black's turn to move, change black pieces to 1 and white pieces to -1, since positive numbers indicate the player currently making a move
			x *= -1

		x = x.unsqueeze(0) # Adds a new dimension of size 1 so the shape of x matches the input the neural network expects (6, 8, 8) to (1, 6, 8, 8)
		move = model(x)

		torch.set_grad_enabled(False)
		vals = []
		froms = [str(legal_move)[:2] for legal_move in legal_moves]
		froms = list(set(froms))
		for from_ in froms:
			val = move[0,:,:][0, 8 - int(from_[1]),letter_2_num[from_[0]]]
			vals.append(val)
		probs = distribution_over_moves(vals)
		rng = np.random.default_rng()
		chosen_from = rng.choice(froms, size=1, p=probs)
		vals = []
		for legal_move in legal_moves:
			from_ = str(legal_move)[:2]
			if from_ == chosen_from:
				to = str(legal_move)[2:]
				val = move[0,:,:][1, 8 - int(to[1]),letter_2_num[to[0]]]
				vals.append(val)
			else:
				vals.append(0)
		chosen_move = legal_moves[np.argmax(vals)]
		torch.set_grad_enabled(True)
		return chosen_move

board = chess.Board()
print(choose_move(board, model))
