In [10]:
#python level imports
import chess
import gc
import chess.svg
import cv2
from IPython.display import display, SVG

import numpy as np
import random
from tqdm import tqdm
from importlib import reload

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import utils
reload(utils)
import utils

In [2]:
if torch.cuda.is_available():
    # Set default tensor type to CUDA tensors
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    
else:
    
    torch.set_default_tensor_type(torch.FloatTensor)
 
print(torch.cuda.is_available())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

False


In [None]:
#generate N games worth of data
boards, meta, _, moves, _, _ = generate_data(output_file, N = 20_000)

In [4]:
# Behavioral Cloning Model Architecture
class MLPv2_1(nn.Module):

    def __init__(self):

        super().__init__()
        self.conv1 = nn.Conv2d(14, 64, 3, 1, padding=1, padding_mode = 'zeros')
        
        self.layers = nn.ModuleList()
        
        self.depth = 6
        
        for _ in range(self.depth):
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))
            self.layers.append(nn.Conv2d(64,64, 3, 1, padding=1, padding_mode = 'zeros'))
            self.layers.append(nn.BatchNorm2d(64))

        self.linear = nn.Linear(4096, 128)

    def forward(self, x):

        x = self.conv1(x)
        
        for i in range(self.depth):
            j = i*4
            ph = x.clone()
            ph = self.layers[j](ph)
            ph = self.layers[j+1](ph)
            ph = F.relu(ph)
            ph = self.layers[j+2](ph)
            ph = self.layers[j+3](ph)
            
            x = x + ph
            x = F.relu(x)
                  
        
        x = torch.flatten(x, start_dim=1)
        
        x = self.linear(x)
        
        minn, ila = x[:,:64], x[:,64:]

        return minn, ila

In [None]:
# Set up model, dataloader, loss criteria and optimizers
RDv2_1 = MLPv2_1()
RDv2_1 = RDv2_1.to(device)

criterion = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()

optimizer = optim.SGD(RDv2_1.parameters(), lr = 0.01, momentum=0.9)
B = 128

loader = DataLoader(ChessDataConv(boards, meta, moves), batch_size = B, shuffle = True, generator=torch.Generator(device='cuda'))

In [None]:
#Initial Training Loop
losses = []

G = len(loader)

for epoch in range(20):

    running_loss = 0

    for bitboards, target in tqdm(loader):

        optimizer.zero_grad()

        minn, ila = RDv2_1(bitboards)

        loss = criterion(minn, target[:,:64]) + criterion2(ila, target[:,64:])

        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        
    running_loss /= G
    
    losses.append(running_loss)
    print(f'Finished epoch {epoch+1} with loss {running_loss:.4f}')
    if epoch % 5 == 4:
        torch.save(RDv2_1,f"RDv2.1 3M' {G*B*(epoch+1)}iters.pt")
        print(f"Baseline against RDv2.0: {test_against(lambda x: network_agent_prob_conv(x, RDv2_1, with_attacks=True), lambda x: network_agent_prob_conv(x, RDv2_1_0, with_attacks=True),N=50)}")
        
    
    if epoch > 3 and losses[-1] > losses[-2]:
        break
        

print('Finished Training')

In [None]:
# this is the bigger trianing loop used to finally optimize the model—very similar to the training loop above
    
loops = 3
start = 0

losses = []

RDv2_3 = torch.load("Models/RDv2.3 CB.pt", map_location=device)

criterion = nn.CrossEntropyLoss()
criterion2 = nn.CrossEntropyLoss()

optimizer = optim.SGD(RDv2_3.parameters(), lr = 0.001, momentum=0.9)
B = 1024

for i in range(start,loops + start):

    boards, meta, _, moves = load_from_csv("Data/lichess_games.csv", N = 1_000_000, skip=1_000_000 * i)
    
    loader = DataLoader(ChessDataConv(boards, meta, moves), batch_size = B, shuffle = True, generator=torch.Generator(device='cuda'))
    
    G = len(loader)

    for epoch in range(15):

        running_loss = 0

        for bitboards, target in loader:

            optimizer.zero_grad()

            minn, ila = RDv2_3(bitboards)

            loss = criterion(minn, target[:,:64]) + criterion2(ila, target[:,64:])

            loss.backward()

            optimizer.step()

            running_loss += loss.item()

        running_loss /= G

        losses.append(running_loss)
        print(f'Finished epoch {epoch+1} with loss {running_loss:.4f}')


        if epoch > 3 and losses[-1] > losses[-2]:
            break
            
    torch.save(RDv2_3,f"RDv2.3 {i} loops.pt")
            
    compete = test_against(lambda x: network_agent_prob_conv(x, RDv2_3, with_attacks=True), \
                           lambda x: network_agent_prob_conv(x, RDv2_1, with_attacks=True),N=50)
    print(f"Baseline against RDv2.1: {compete}")
    
    del loader, boards, meta, moves

    # Clean up GPU memory
    torch.cuda.empty_cache()
    gc.collect()


print('Finished Training')

# Clear cuda cache stuff

In [14]:
# run this cell to clear cuda

torch.cuda.empty_cache()
gc.collect()

1395