In [4]:
import random

import chess
import chess.svg
import chess.engine

import torch
import torch.nn as nn

import pandas as pd
import numpy as np
from tqdm import tqdm

from utils.Game_playing import index_map
from utils.Dataloading import fen_to_board

In [2]:
def load_stockfish_data(N=2_000_000):

    """returns the boards, metadata, human moves and stockfish moves for N games"""

    file_path = "Data/StockData.csv"
    new_data = True
    epsilon = 1e-5

    df = pd.read_csv(file_path, nrows=N)
    ind_map = index_map.cpu().numpy()


    df.dropna(axis=0, inplace=True)
    df.reset_index(inplace=True)


    fens = np.array(df['FENs'])
    moves = np.zeros((len(fens), 128))
    h_moves = np.zeros((len(fens), 128))

    bitboards, meta = zip(*[fen_to_board(x) for x in tqdm(fens, total=len(fens))])

    bitboards = np.asarray(bitboards)

    meta = np.asarray(meta)

    if new_data:

        moves[df.index,df['stock_moves'].apply(lambda x: chess.Move.from_uci(x).from_square)] = 1
        moves[df.index, 64 + df['stock_moves'].apply(lambda x: chess.Move.from_uci(x).to_square)] = 1

    else:

        for i in tqdm(range(4)):

            moves[df.index,df[f'move{i}'].apply(lambda x: chess.Move.from_uci(x).from_square)] += df[f'eval{i}'] + epsilon
            moves[df.index, 64 + df[f'move{i}'].apply(lambda x: chess.Move.from_uci(x).to_square)] += df[f'eval{i}'] + epsilon


    h_moves[df.index, df["hmoves"].apply(lambda x: chess.Move.from_uci(x).from_square)] = 1
    h_moves[df.index, 64 + df["hmoves"].apply(lambda x: chess.Move.from_uci(x).to_square)] = 1

    flipped_moves = np.zeros_like(moves)
    flipped_moves[:,:64] = moves[:,ind_map]
    flipped_moves[:,64:] = moves[:,64+ind_map]
    #v_fens = np.vectorize(fen_to_board)
    moves = np.where(np.expand_dims(meta, 1), moves, flipped_moves)

    flipped_moves = np.zeros_like(h_moves)
    flipped_moves[:,:64] = h_moves[:,ind_map]
    flipped_moves[:,64:] = h_moves[:,64+ind_map]
    #v_fens = np.vectorize(fen_to_board)
    h_moves = np.where(np.expand_dims(meta, 1), h_moves, flipped_moves)

    #bitboards = np.array(list(map(fen_to_board, fens)))

    del df

    return bitboards, meta, h_moves, moves

In [79]:
bitboards, meta, human_moves, ai_moves = load_stockfish_data(500_000)

100%|██████████| 500000/500000 [02:07<00:00, 3909.69it/s]


In [85]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model architecture taken from behavioral_cloning.ipynb
class AntiCheatBC(nn.Module):

    def __init__(self, depth):

        super().__init__()
        self.conv1 = nn.Conv2d(4, 64, 3, 1, padding=1, padding_mode = 'zeros')

        self.layers = nn.ModuleList()
        self.depth = depth

        for _ in range(self.depth):
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))

        self.linear = nn.Linear(4096, 1)
        # self.out_linear = nn.Linear(1024, 1)

    def forward(self, x):

        x = self.conv1(x)

        for i in range(self.depth):
            j = i*4
            ph = x.clone()
            ph = self.layers[j](ph)
            ph = self.layers[j+1](ph)
            ph = nn.functional.relu(ph)
            ph = self.layers[j+2](ph)
            ph = self.layers[j+3](ph)

            x = x + ph
            x = nn.functional.relu(x)


        x = torch.flatten(x, start_dim=1)

        x = self.linear(x)

        # x = self.out_linear(x)
        
        return x

In [16]:
from torch.utils.data import Dataset, DataLoader

import random

In [70]:
class ChessDataConv(torch.utils.data.Dataset):

    def __init__(self, bitboards, white_turn, moves, ai_moves):

        self.bitboards = torch.tensor(bitboards, dtype = torch.float).to(device)

        self.moves = torch.tensor(moves, dtype = torch.float, device = device).view(-1,2,8,8)

        self.ai_moves = torch.tensor(ai_moves, dtype = torch.float, device = device).view(-1,2,8,8)

    def __len__(self):

        return self.moves.size(dim=0)

    def __getitem__(self, idx):
        
        if random.random() > 0.5:
            
            return self.bitboards[idx], self.moves[idx], 0.0
        
        else:
            
            return self.bitboards[idx], self.ai_moves[idx], 1.0

In [69]:
# model architecture taken from behavioral_cloning.ipynb
class MLPv2_1(nn.Module):

    def __init__(self):

        super().__init__()
        self.conv1 = nn.Conv2d(14, 64, 3, 1, padding=1, padding_mode = 'zeros')

        self.layers = nn.ModuleList()

        self.depth = 6

        for _ in range(self.depth):
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))

        self.linear = nn.Linear(4096, 128)

    def forward(self, x):

        x = self.conv1(x)

        for i in range(self.depth):
            j = i*4
            ph = x.clone()
            ph = self.layers[j](ph)
            ph = self.layers[j+1](ph)
            ph = nn.functional.relu(ph)
            ph = self.layers[j+2](ph)
            ph = self.layers[j+3](ph)

            x = x + ph
            x = nn.functional.relu(x)


        x = torch.flatten(x, start_dim=1)

        x = self.linear(x)

        minn, ila = x[:,:64], x[:,64:]

        return minn, ila

#import the behavioral cloning model from saved models
RDv2 = torch.load("Models/RDv2.3 CB.pt", map_location= device)

In [80]:
data = ChessDataConv(bitboards, meta, human_moves, ai_moves)

In [86]:
Mk1 = AntiCheatBC(2)

optim = torch.optim.Adam(Mk1.parameters())
criterion = nn.BCEWithLogitsLoss()

In [82]:
loader = DataLoader(data, batch_size = 64, shuffle = True, generator=torch.Generator(device=device))

In [96]:
#Initial Training Loop
eps = 20
losses = []

G = len(loader)

for epoch in range(eps):

    running_loss = 0

    for bitboards, moves, target in tqdm(loader):

        optim.zero_grad()
        our_move = RDv2(bitboards)
        our_move = torch.cat((torch.tensor(our_move[0]).view(-1,1,8,8), torch.tensor(our_move[1]).view(-1,1,8,8)), dim=1)

        output = Mk1(torch.cat((our_move, moves), dim=1))

        loss = criterion(output.view(-1), target.float())

        loss.backward()

        optim.step()

        running_loss += loss.item()

    running_loss /= G

    losses.append(running_loss)
    print(f'Finished epoch {epoch+1} with loss {running_loss:.4f}')
    if epoch % 5 == 4:
        torch.save(Mk1,f"ACbc_Mk1' ep{epoch}.pt")

    if epoch > 3 and losses[-1] > losses[-2]:
        break


print('Finished Training')

  our_move = torch.cat((torch.tensor(our_move[0]).view(-1,1,8,8), torch.tensor(our_move[1]).view(-1,1,8,8)), dim=1)
100%|██████████| 7813/7813 [00:54<00:00, 142.93it/s]


Finished epoch 1 with loss 0.6961


100%|██████████| 7813/7813 [00:55<00:00, 141.54it/s]


Finished epoch 2 with loss 0.6918


100%|██████████| 7813/7813 [00:55<00:00, 140.74it/s]


Finished epoch 3 with loss 0.6893


  3%|▎         | 271/7813 [00:01<00:54, 137.84it/s]


KeyboardInterrupt: 

In [78]:
bitboards.size()

torch.Size([64, 16, 8, 8])

In [56]:
corr = 0
for datum, label in tqdm(loader):

    output = Mk1(datum)
    output = nn.functional.sigmoid(output)

    corr += torch.sum(label == torch.round(output).view(-1))
    
print(f"Accuracy: {corr / len(loader) / 64}")

100%|██████████| 7813/7813 [00:31<00:00, 251.71it/s]

Accuracy: 0.5003679990768433





In [48]:
len(loader) * 64

500032

In [43]:
corr / 64

tensor(250119.0625)