## Imports

In [1]:
import torch
import torch.nn as nn
from model import Neuro_gambit, Neuro_gambit_2, Neuro_gambit_3
from torch.utils.data import DataLoader

## ELO or Year init

In [2]:
elo_or_year = 2010

## Device init

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # using cuda
print(device)

cuda


## Loading Tensors

In [5]:
batch_size = 100000

X = torch.load('./large_data/X_tensor_'+str(elo_or_year)+'.pt')
Y = torch.load('./large_data/Y_tensor_'+str(elo_or_year)+'.pt')

print(X.shape)
print(Y.shape)

X_loader = DataLoader(X, batch_size=batch_size, shuffle=False)
Y_loader = DataLoader(Y, batch_size=batch_size, shuffle=False)

total_batches = len(X_loader)
print(total_batches)

torch.Size([238000, 833])
torch.Size([238000, 36])
3


## Model class init

In [5]:
# model = Neuro_gambit().to(device)
model = Neuro_gambit_2().to(device)
# model = Neuro_gambit_3().to(device)

## Learning params init

In [6]:
# epochs, loss, and optim
learning_rate = 0.001
n_epochs = 1000000

# loss and optimizer functions from pytorch
criterion = nn.MSELoss() # MSE function
# optimizer = torch.optim.SGD(params=model.parameters(), lr=learning_rate) # stochastic gradient descent function
optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate) # way better performance with AdamW than SGD

## Loading saved model

In [7]:
model.load_state_dict(torch.load('./models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')) # it takes the loaded dictionary, not the path file itself
model.eval()
model.to(device)

Neuro_gambit_2(
  (encoder): Sequential(
    (0): Linear(in_features=833, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=256, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Linear(in_features=64, out_features=36, bias=True)
  )
)

## Current Loss

In [8]:
min_loss = 1

# seperating the Y
Y1 = Y[:, :8]
Y2 = Y[:, 8:16]
Y3 = Y[:, 16:24]
Y4 = Y[:, 24:32]
Y5 = Y[:, 32:]

Y_list = [Y1,Y2,Y3,Y4,Y5]

with torch.no_grad():
    y_preds = model(X.to(device)) # will output a tuple of 5 tensors

    total_loss = 0
    for i in range(len(y_preds)): # calculating the loss per tensor
        y_pred = y_preds[i]
        total_loss += criterion(y_pred, Y_list[i].to(device))

    print('Current loss:', f'{total_loss.item()*100:.3f}%')
    min_loss = total_loss


Current loss: 5.731%


## Training

In [17]:
for epoch in range(n_epochs):
    batch = 0
    for X_batch, Y_batch in zip(X_loader, Y_loader):
        X_batch = X_batch.to(device)
        Y_batch = Y_batch.to(device)
        # forward
        y_preds = model(X_batch) # will output a tuple of 5 tensors

        # seperating the Y
        Y1 = Y_batch[:, :8]
        Y2 = Y_batch[:, 8:16]
        Y3 = Y_batch[:, 16:24]
        Y4 = Y_batch[:, 24:32]
        Y5 = Y_batch[:, 32:]

        Y_list = [Y1,Y2,Y3,Y4,Y5]

        total_loss = 0
        for i in range(len(y_preds)): # calculating the loss per tensor
            y_pred = y_preds[i]
            total_loss += criterion(y_pred, Y_list[i])

        # backward
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        if (batch+1) % 2 == 0:
            print(f'Epoch [{epoch+1}/{n_epochs}], Batch [{batch+1}/{total_batches}], Loss: {total_loss.item():.4f}', end='\r')
        batch+=1

Epoch [20/1000000], Batch [4/4], Loss: 0.1596

KeyboardInterrupt: 

In [None]:
from IPython.display import clear_output
# seperating the Y
Y1 = Y[:, :8]
Y2 = Y[:, 8:16]
Y3 = Y[:, 16:24]
Y4 = Y[:, 24:32]
Y5 = Y[:, 32:]

Y_list = [Y1,Y2,Y3,Y4,Y5]

for epoch in range(n_epochs):
    # forward
    y_preds = model(X.to(device)) # will output a tuple of 5 tensors

    total_loss = 0
    for i in range(len(y_preds)): # calculating the loss per tensor
        y_pred = y_preds[i]
        total_loss += criterion(y_pred, Y_list[i].to(device))

    # backward
    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if (epoch) % 5 == 0:
        print(f'Epoch [{epoch+1}/{n_epochs}], Loss: {total_loss.item():.4f}', end='\r')
    
    if total_loss.item() < min_loss:
        min_loss = total_loss.item()
        clear_output()
        print('model saved, loss:', min_loss)
        torch.save(model.state_dict(), './models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')

model saved, loss: 0.05326352268457413


## Saving the model

In [12]:
# Save the model
torch.save(model.state_dict(), './models/'+str(model._get_name())+'_'+str(elo_or_year)+'.pt')
print('Model saved')

Model saved


## Saving the model to cpu

In [None]:
# Save the model
cpu = torch.device('cpu') # using cuda
model.to(cpu)
torch.save(model.state_dict(), './models/'+str(model._get_name())+'_cpu_'+str(elo_or_year)+'.pt')
print('Model saved')
model.to(device)

In [None]:
# Playing a game
from model import get_best_move
import chess
import chess.svg
import matplotlib.pyplot as plt
from cairosvg import svg2png
import cv2
from IPython.display import clear_output

def draw_board(current_board, ai_col_chess):
    """Draw board

    Keyword arguments:
    current_board -- chess.Board()
    from https://colab.research.google.com/github/iAmEthanMai/chess-engine-model/blob/main/python_chess_engine.ipynb#scrollTo=yveIUxzjUr2b
    """
    board_img = chess.svg.board(current_board, flipped=ai_col_chess==chess.WHITE)
    svg2png(bytestring=board_img,write_to='./boards/board.png')
    img = cv2.imread('./boards/board.png', 1)
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.axis('off')
    plt.imshow(img)


def get_algebraic_notation(move_stack):
    board = chess.Board()
    algebraic_moves = []
    
    for move in move_stack:
        algebraic_moves.append(board.san(move))
        board.push(move)
    
    return algebraic_moves


board = chess.Board()
ai_col = 'black'
ai_col_chess = chess.BLACK if ai_col == 'black' else chess.WHITE
last = None
while not board.is_game_over():
    # render game
    clear_output()
    alg_move_stack = get_algebraic_notation(board.move_stack)
    print(" ".join(alg_move_stack))
    print("Last AI move:", alg_move_stack[-1] if alg_move_stack != [] else '', last)
    draw_board(board, ai_col_chess)
    plt.show()

    # handle moves
    if board.turn == ai_col_chess:
        move_prob = get_best_move(model, board, ai_col)
        last = move_prob
        board.push_uci(move_prob['move'])
    else:
        try:
            user_input = input() # your input in algebraic
            board.push_san(user_input)
        except chess.IllegalMoveError:
            print('Illegal move:', user_input)
print('Winner:', 'white' if board.turn == chess.BLACK else 'black')


In [None]:
# Playing a game with itself
from model import get_best_move
import chess
import chess.svg
import matplotlib.pyplot as plt
from cairosvg import svg2png
import cv2
from IPython.display import clear_output

def draw_board(current_board, ai_col_chess):
    """Draw board

    Keyword arguments:
    current_board -- chess.Board()
    from https://colab.research.google.com/github/iAmEthanMai/chess-engine-model/blob/main/python_chess_engine.ipynb#scrollTo=yveIUxzjUr2b
    """
    board_img = chess.svg.board(current_board, flipped=ai_col_chess==chess.WHITE)
    svg2png(bytestring=board_img,write_to='./boards/board.png')
    img = cv2.imread('./boards/board.png', 1)
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.axis('off')
    plt.imshow(img)


def get_algebraic_notation(move_stack):
    board = chess.Board()
    algebraic_moves = []
    
    for move in move_stack:
        algebraic_moves.append(board.san(move))
        board.push(move)
    
    return algebraic_moves


board = chess.Board()
# ai_col_chess = chess.BLACK if ai_col == 'black' else chess.WHITE
last = None
while not board.is_game_over():
    # handle moves
    move_prob = get_best_move(model, board, 'black' if board.turn == chess.BLACK else "white")
    last = move_prob
    board.push_uci(move_prob['move'])
alg_move_stack = get_algebraic_notation(board.move_stack)
print(len(alg_move_stack))
print(" ".join(alg_move_stack))
if board.is_variant_draw():
    print("Draw")
else:
    print('Winner:', 'white' if board.turn == chess.BLACK else 'black')