Import necessary packages. Make sure to use a virtual environment and run 'python -m requriements.txt'.

In [None]:
import pandas as pd
import chess
import numpy as np
import torch, torchvision
import sklearn.metrics as skmetrics
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings(action='ignore')
np.random.seed = 1

First, import and process data.

The 'x' dataframe will consist of each sample, which has a 12x8x8 feature space, representing an 8x8 bitboard for each of the 12 pieces.

*Currently, only loads first sample and appends the bitboard.

In [None]:
# Load the dataset
df = pd.read_csv('toyChessData.csv')  # Replace 'your_dataset.csv' with your actual file name

# white is capital and positive, black is lowercase and negative
key = {'r': 0, 'n': 1, 'b': 2, 'q': 3,'k':4, 'p': 5,'R': 6, 'N': 7, 'B': 8, 'Q': 9,'K': 10, 'P': 11}

x = []
y = []

# Transforming
for idx, row in df.head(100).iterrows():
    board = chess.Board(row['FEN'])

    bitboard = np.zeros((12, 64), dtype=np.int8)

    for square in chess.SQUARES:
        piece = board.piece_at(square)
        if piece:
            bitboard[key[piece.symbol()]][square] = 1

    x.append(bitboard)

    eval = row['Evaluation'].strip('+')
    if '#' in eval:
        y.append(np.sign(int(eval[1:])) * 1000)
    else:
        y.append(int(eval))

X = np.array(x) # stores the Nx12x8x8 bitboard of a single sample
y = np.array(y)

y[y > 0] = 1 # white wins, pos
y[y <= 0] = 0 # black wins, neg

Next, this section will visualize the chessboard and its vectorized counterpart using the plot images function. This is from assignment 4, and outputs a black-and-white image of each piece's position on the board. 

In [None]:
def plot_images(X, n_row, n_col, title, cmap='gray'):
    '''
    Creates n_row by n_col panel of images

    Args:
        X : numpy[float32]
            N x h x w numpy array
        n_row : int
            number of rows in figure
        n_col : list[str]
            number of columns in figure
        title : str
            title of plot
        cmap : str
            colormap to use for visualizing images
    '''

    fig = plt.figure()

    # Set title as figure super title
    fig.suptitle(title)

    # Iterate through X and plot the first n_row x n_col elements as figures
    # visualize using the specified 'gray' colormap
    # use plt.box(False) and plt.axis('off') to turn off borders and axis
    for i in range(0, n_row * n_col):
        ax = fig.add_subplot(n_row, n_col, i + 1)
        ax.imshow(X[i], cmap=cmap)
        plt.axis('off')
        plt.box(False)

    plt.show()

In [None]:
# Reshape handwritten digits dataset to 12 x 8 x 8
X_reshaped = np.reshape(X[0], (X[0].shape[0], 8, 8))
# X_reshaped = np.transpose(X, (0, 2, 3, 1)) # Nx8x8x12

# Plot 3 x 3 panel of handwritten digits with title 'Handwritten digits dataset'
plot_images(X_reshaped, 4, 3, title='Chess board dataset')

# Vectorize handwritten digits dataset to N x D
X_vectorized = np.reshape(X_reshaped, (X_reshaped.shape[0], 1, -1))

# Plot 9 x 1 panel of handwritten digits with title 'Vectorized handwritten digits dataset'
plot_images(X_vectorized, 12, 1, title='Vectorized chess board dataset')

Neural Network set-up

In [None]:
# TODO: Choose hyper-parameters

# Model - either neural network or logistic regression
MODEL_NAME = 'neural_network'

# Batch size - number of images within a training batch of one training iteration
N_BATCH = 64

# Training epoch - number of passes through the full training dataset
N_EPOCH = 5

# Learning rate - step size to update parameters
LEARNING_RATE = 1e-1

# Learning rate decay - scaling factor to decrease learning rate at the end of each decay period
LEARNING_RATE_DECAY = 0.50

# Learning rate decay period - number of epochs before reducing/decaying learning rate
LEARNING_RATE_DECAY_PERIOD = 25

In [None]:
class NeuralNetwork(torch.nn.Module):
    '''
    Neural network class of fully connected layers

    Arg(s):
        n_input_feature : int
            number of input features
        n_output : int
            number of output classes
    '''

    def __init__(self, n_input_feature, n_output):
        super(NeuralNetwork, self).__init__()

        # Create your 6-layer neural network using fully connected layers with ReLU activations
        # https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
        # https://pytorch.org/docs/stable/generated/torch.nn.functional.relu.html
        # https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html


        #  : Instantiate 5 fully connected layers
        self.fully_connected_layer_1 = torch.nn.Linear(n_input_feature, 175)
        self.fully_connected_layer_2 = torch.nn.Linear(175, 1750)
        self.fully_connected_layer_3 = torch.nn.Linear(1750, 4096)
        self.fully_connected_layer_4 = torch.nn.Linear(4096, 1750)
        self.fully_connected_layer_5 = torch.nn.Linear(1750, 175)

        # : Define output layer
        self.output = torch.nn.Linear(175, n_output)

    def forward(self, x):
        '''
        Forward pass through the neural network

        Arg(s):
            x : torch.Tensor[float32]
                tensor of N x d
        Returns:
            torch.Tensor[float32]
                tensor of n_output predicted class
        '''

        #  : Implement forward function
        output_fc1 = torch.nn.functional.relu(self.fully_connected_layer_1(x))
        output_fc2 = torch.nn.functional.relu(self.fully_connected_layer_2(output_fc1))
        output_fc3 = torch.nn.functional.relu(self.fully_connected_layer_3(output_fc2))
        output_fc4 = torch.nn.functional.relu(self.fully_connected_layer_4(output_fc3))
        output_fc5 = torch.nn.functional.relu(self.fully_connected_layer_5(output_fc4))

        output_logits = self.output(output_fc5)

        return output_logits

In [None]:
def train(model,
          dataloader,
          n_epoch,
          optimizer,
          learning_rate_decay,
          learning_rate_decay_period,
          device):
    '''
    Trains the model using optimizer and specified learning rate schedule

    Arg(s):
        model : torch.nn.Module
            neural network or logistic regression
        dataloader : torch.utils.data.DataLoader
            # https://pytorch.org/docs/stable/data.html
            dataloader for training data
        n_epoch : int
            number of epochs to train
        optimizer : torch.optim
            https://pytorch.org/docs/stable/optim.html
            optimizer to use for updating weights
        learning_rate_decay : float
            rate of learning rate decay
        learning_rate_decay_period : int
            period to reduce learning rate based on decay e.g. every 2 epoch
        device : str
            device to run on
    Returns:
        torch.nn.Module : trained network
    '''

    device = 'cuda' if device == 'gpu' or device == 'cuda' else 'cpu'
    device = torch.device(device)

    #  : Move model to device
    model = model.to(device)

    #  : Define cross entropy loss
    # https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
    loss_func = torch.nn.CrossEntropyLoss()

    for epoch in range(n_epoch):

        # Accumulate total loss for each epoch
        total_loss = 0.0

        #  : Decrease learning rate when learning rate decay period is met
        # Directly modify param_groups in optimizer to set new learning rate
        # e.g. decrease learning rate by a factor of decay rate every 2 epoch
        if epoch and epoch % learning_rate_decay_period == 0:
            for param_group in optimizer.param_groups:
              param_group['lr'] *= learning_rate_decay

        for batch, (boards, labels) in enumerate(dataloader):

            #  : Move images and labels to device
            boards = boards.to(device)
            labels = labels.to(device)

            #  : Vectorize images from (N, H, W, C) to (N, d)
            n_dim = np.prod(boards.shape[1:])
            boards = boards.view(-1, n_dim)

            #  : Forward through the model
            outputs = model(boards)

            #  : Clear gradients so we don't accumlate them from previous batches
            optimizer.zero_grad()

            #  : Compute loss function
            loss = loss_func(outputs, labels)

            #  : Update parameters by backpropagation
            loss.backward()
            optimizer.step()

            #  : Accumulate total loss for the epoch
            total_loss = total_loss + loss.item()

        mean_loss = total_loss / len(dataloader)

        # Log average loss over the epoch
        print('Epoch={}/{}  Loss: {:.3f}'.format(epoch + 1, n_epoch, mean_loss))

    return model


In [None]:
# Create transformations convert data to torch tensor
# https://pytorch.org/docs/stable/torchvision/transforms.html
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

# Set path to save checkpoint
checkpoint_path = './checkpoint-{}.pth'.format(MODEL_NAME)

In [None]:
# Define your dataset class
class ChessDataset(torch.utils.data.Dataset):
    def __init__(self, bitboards, labels, transform=None):
        self.bitboards = bitboards
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.bitboards)

    def __getitem__(self, idx):
        sample = self.bitboards[idx]
        label = self.labels[idx]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample, label

# Setup your dataset and data loader
bitboards = X
labels = y

chess_dataset = ChessDataset(bitboards, labels, transform=transforms)  # Replace bitboards and labels with your data
dataloader_train = torch.utils.data.DataLoader(
    chess_dataset,
    batch_size=N_BATCH,
    shuffle=True,
    drop_last=True,
    num_workers=2)

# Define possible classes (if applicable)
# class_names = [...]  # You may not need this if you don't have predefined classes
n_class = 2 # 0 for black, 1 for white

# Define the number of input features based on the shape of your bitboards
# 12 x 8 x 8
n_input_feature = bitboards.shape[1] * bitboards.shape[2] * bitboards.shape[3]

# Instantiate your model based on MODEL_NAME
if MODEL_NAME == 'neural_network':
    model = NeuralNetwork(n_input_feature, n_class)  # You need to define n_class
else:
    raise ValueError('Unsupported model name: {}'.format(MODEL_NAME))

# Setup optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

# Train your model
model.train()
model = train(
    model,
    dataloader_train,
    N_EPOCH,
    optimizer,
    learning_rate_decay=LEARNING_RATE_DECAY,
    learning_rate_decay_period=LEARNING_RATE_DECAY_PERIOD,
    device='cuda')

# Save weights
torch.save({'state_dict' : model.state_dict()}, checkpoint_path)