In [3]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import chess
from generate_training_data import ChessDataset
import numpy as np

In [7]:
train_data = ChessDataset(num_examples=16384)
train_data_loader = DataLoader(train_data, batch_size=128, shuffle=True)

In [8]:
class ChessNetCNN(nn.Module):
    def __init__(self, hidden_size):
        super(ChessNetCNN, self).__init__()
        self.hidden_size = hidden_size
        self.conv1 = nn.Conv2d(in_channels=12, out_channels=hidden_size, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3, padding=1)
        self.bn = nn.BatchNorm2d(hidden_size)
        self.activation = nn.ReLU()
        
        self.fc1 = nn.Linear(hidden_size * 8 * 8, 256)
        self.out1 = nn.Linear(256, 64)
        self.out2 = nn.Linear(256, 64)
        
    def forward(self, x):
        x = self.activation(self.bn(self.conv1(x)))
        x = self.activation(self.bn(self.conv2(x)))
        x = self.activation(self.bn(self.conv3(x)))
        x = x.view(-1, self.hidden_size * 8 * 8)
        x = self.activation(self.fc1(x))
        return self.out1(x), self.out2(x)
        

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChessNetCNN(128).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.5)  # Learning rate scheduler

In [10]:
num_epochs = 200
for epoch in range(num_epochs):
    
    for batch, (board, move) in enumerate(train_data_loader):
        board = board.to(device)
        sources = move[:, 0].to(device)
        destinations = move[:, 1].to(device)
        
        pred_sources, pred_destinations = model(board)
        loss_from = criterion(pred_sources, sources)
        loss_to = criterion(pred_destinations, destinations)
        
        optimizer.zero_grad()
        loss = loss_from + loss_to
        loss.backward()
        optimizer.step()
        
    scheduler.step()
        
    if epoch % 10 == 0:
        print(f'Epoch {epoch + 1}/{num_epochs} Loss: {loss.item():.4f}')

Epoch 1/200 Loss: 7.3161
Epoch 11/200 Loss: 2.4195
Epoch 21/200 Loss: 0.5299
Epoch 31/200 Loss: 0.2902
Epoch 41/200 Loss: 0.2426
Epoch 51/200 Loss: 0.3026
Epoch 61/200 Loss: 0.2932
Epoch 71/200 Loss: 0.3732
Epoch 81/200 Loss: 0.1621
Epoch 91/200 Loss: 0.3036
Epoch 101/200 Loss: 0.1796
Epoch 111/200 Loss: 0.2729
Epoch 121/200 Loss: 0.1770
Epoch 131/200 Loss: 0.0984
Epoch 141/200 Loss: 0.1978
Epoch 151/200 Loss: 0.0835
Epoch 161/200 Loss: 0.1173
Epoch 171/200 Loss: 0.2038
Epoch 181/200 Loss: 0.0895
Epoch 191/200 Loss: 0.1701


In [11]:
PATH = 'chess_net_CNN___.pth'
torch.save(model.state_dict(), PATH)

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ChessNetCNN(hidden_size=128).to(device)
model.load_state_dict(torch.load('chess_net_CNN___.pth'))

<All keys matched successfully>

In [13]:
import chess.engine

engine = chess.engine.SimpleEngine.popen_uci(r"C:\Users\jaint\stockfish\stockfish-windows-x86-64-avx2")


# stockfish's evaluation for a position will be the reward for the RL algorithm

def evaluate_board(board):
    result = engine.analyse(board, chess.engine.Limit(
        time=0.1))  # gives stockfish score of the current position (scaled up by 100)
    evaluation = result["score"]
    if evaluation.is_mate():  # score() returns None if the position has forced mate - so it is handled separately
        plies = evaluation.pov(chess.WHITE).mate()
        if plies > 0:  # White is the one checkmating
            return 21 - plies  # return a large positive score that decays with the number of moves till mate
        return -21 - plies  # Black is the one checkmating
    return result["score"].relative.score() / 100

In [15]:
from chess_board import get_chess_board, square_to_uci
import random

memory = []
max_memory = 10000
epsilon = 0.2  # exploration chance
batch_size = 1
gamma = 0.99

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


def random_board(max_depth=30):
    depth = random.randint(0, max_depth)
    board = chess.Board()
    try:
        for _ in range(depth):
            board.push(random.choice(list(board.legal_moves)))
        return board
    except IndexError:
        return board


def choose_action(curr_board):
    if random.random() < epsilon:
        return random.choice(list(curr_board.legal_moves))

    tensor = torch.from_numpy(get_chess_board(board).reshape(1, 12, 8, 8).astype(np.float32)).to(device)
    move_source, move_destination = model(tensor)
    move_source = square_to_uci(torch.argmax(move_source, 1)[0].data.item())
    move_destination = square_to_uci(torch.argmax(move_destination, 1)[0].data.item())
    if move_source == move_destination:  # NULL move
        UCI = '0000'
    else:
        UCI = move_source + move_destination

    return chess.Move.from_uci(UCI)  # convert it to chess' Move class


def train():
    if len(memory) < batch_size:
        return

    batch = random.sample(memory, batch_size)
    next_states, actions, rewards, dones = zip(*batch)

    rewards = torch.tensor(rewards, dtype=torch.float32).reshape(batch_size).to(device)  # Convert rewards to a tensor
    dones = torch.tensor(dones, dtype=torch.float32).reshape(batch_size).to(device)  # Convert dones to a tensor

    q_from = torch.tensor([a.from_square for a in actions]).to(device)
    q_to = torch.tensor([a.to_square for a in actions]).to(device)

    next_from_values = torch.zeros(batch_size).to(device)
    next_to_values = torch.zeros(batch_size).to(device)

    for i, s in enumerate(next_states):
        if s:
            scores = {}
            for move in s.legal_moves:
                s.push(move)
                scores[move] = evaluate_board(s)
                s.pop()
            if s.turn == chess.WHITE:
                best_move, _ = max(scores.items(), key=lambda x: x[1])
            else:
                best_move, _ = min(scores.items(), key=lambda x: x[1])
            next_from_values[i] = best_move.from_square
            next_to_values[i] = best_move.to_square

    y_from = rewards + gamma * next_from_values * (1 - dones)
    y_to = rewards + gamma * next_to_values * (1 - dones)

    loss_f = loss_fn(q_from, y_from)
    loss_t = loss_fn(q_to, y_to)

    loss = loss_f + loss_t
    loss.requires_grad = True
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


for episode in range(2000):

    board = random_board()

    while not board.is_game_over():
        state = torch.from_numpy(get_chess_board(board).astype(np.float32))
        action = choose_action(board)

        if action in list(board.legal_moves):
            reward = evaluate_board(board)
            done = board.is_game_over()
            board.push(action)
            memory.append((board, action, reward, done))
        else:
            reward = -60
            done = 1
            memory.append((None, action, reward, done))

        if len(memory) > max_memory:
            memory.pop(0)

        if done:
            break
    train()
    if episode % 20 == 0:
        print(f"Episode: {episode}")

Episode: 0
Episode: 20
Episode: 40
Episode: 60
Episode: 80
Episode: 100
Episode: 120
Episode: 140
Episode: 160
Episode: 180
Episode: 200
Episode: 220
Episode: 240
Episode: 260
Episode: 280
Episode: 300
Episode: 320
Episode: 340
Episode: 360
Episode: 380
Episode: 400
Episode: 420
Episode: 440
Episode: 460
Episode: 480
Episode: 500
Episode: 520
Episode: 540
Episode: 560
Episode: 580
Episode: 600
Episode: 620
Episode: 640
Episode: 660
Episode: 680
Episode: 700
Episode: 720
Episode: 740
Episode: 760
Episode: 780
Episode: 800
Episode: 820
Episode: 840
Episode: 860
Episode: 880
Episode: 900
Episode: 920
Episode: 940
Episode: 960
Episode: 980
Episode: 1000
Episode: 1020
Episode: 1040
Episode: 1060
Episode: 1080
Episode: 1100
Episode: 1120
Episode: 1140
Episode: 1160
Episode: 1180
Episode: 1200
Episode: 1220
Episode: 1240
Episode: 1260
Episode: 1280
Episode: 1300
Episode: 1320
Episode: 1340
Episode: 1360
Episode: 1380
Episode: 1400
Episode: 1420
Episode: 1440
Episode: 1460
Episode: 1480
Episod

In [16]:
PATH = 'chess_net_CNN_RL___.pth'
torch.save(model.state_dict(), PATH)

In [17]:
from chess_board import square_to_uci, get_chess_board

new_board = chess.Board()
    
with torch.no_grad():
    
    while not new_board.is_game_over():
        featurized = torch.from_numpy(get_chess_board(new_board).reshape(1, 12, 8, 8).astype(np.float32))
        
        predicted_source, predicted_destination = model(featurized)
        source = square_to_uci(torch.argmax(predicted_source, 1)[0].data.item())
        destination = square_to_uci(torch.argmax(predicted_destination, 1)[0].data.item())
        
        uci = source + destination
        print(uci)
        new_board.push_uci(uci)

d2d3
c7c5
c1d2
g7g6
d2c3
g8f6
c3f6
e7f6
c2c3
f8g7
d1d2
e8g8
d2e3
b8c6
f2f4
b7b6
g2g4
f1b7


IllegalMoveError: illegal uci: 'f1b7' in r1bq1rk1/p2p1pbp/1pn2pp1/2p5/5PP1/2PPQ3/PP2P2P/RN2KBNR b KQ - 0 9