In [1]:
import chess
import chess.pgn
import matplotlib.pyplot as plt
import numpy as np
from IPython import display
import time
from sklearn import preprocessing

In [2]:
path = 'data/output.pgn'

## LAYOUT
- The first section of this notebook extracts moves from the .PGN files.

- The second section will label encode the board for each move of each game into an array of (1,64).

- The third section will extract the evaluation of the board after each move of each game into an array of (1,1). Can directly be used as a target for supervised learning or can be used to classify into 15 different labels. Every board position can be labeled as Winning(7), Losing(7) or Draw according to the evaluation we have extracted.

### Get Moves from PGN file

In [3]:
def get_moves(game):
    moves = []
    for i in game.mainline_moves():
        moves.append(i)
    return moves

In [4]:
file = open(path, encoding="utf-8")
games = []
a = chess.pgn.read_game(file)
while a:
    games.append(a)
    try:
        a = chess.pgn.read_game(file)
    except:
        print(f'could not read game number {len(games)}')

### Label Encoding

In [5]:
import numpy as np
import chess

def get_piece_type(piece):
    # Helper function to get the type of a chess piece (with color information)
    if piece is None:
        return None
    elif piece.color == chess.WHITE:
        if piece.piece_type == chess.PAWN:
            return 'P'
        elif piece.piece_type == chess.KNIGHT:
            return 'N'
        elif piece.piece_type == chess.BISHOP:
            return 'B'
        elif piece.piece_type == chess.ROOK:
            return 'R'
        elif piece.piece_type == chess.QUEEN:
            return 'Q'
        elif piece.piece_type == chess.KING:
            return 'K'
    elif piece.color == chess.BLACK:
        if piece.piece_type == chess.PAWN:
            return 'p'
        elif piece.piece_type == chess.KNIGHT:
            return 'n'
        elif piece.piece_type == chess.BISHOP:
            return 'b'
        elif piece.piece_type == chess.ROOK:
            return 'r'
        elif piece.piece_type == chess.QUEEN:
            return 'q'
        elif piece.piece_type == chess.KING:
            return 'k'

def get_board_matrix(board_state):
    # Initialize an 8x8 matrix to represent the board
    matrix = np.zeros((8, 8), dtype=str)

    # Parse the board_state string to obtain the positions of the pieces on the board
    board = chess.Board(board_state)
    for row in range(8):
        for col in range(8):
            # Get the square index corresponding to the current row and column
            square = chess.square(col, 7 - row)

            # Get the type of the piece occupying the current square (if any)
            piece_type = get_piece_type(board.piece_at(square))

            # Store the piece type in the matrix
            matrix[row][col] = piece_type or '.'

    return matrix


In [6]:
get_board_matrix(games[0].board().fen())

array([['r', 'n', 'b', 'q', 'k', 'b', 'n', 'r'],
       ['p', 'p', 'p', 'p', 'p', 'p', 'p', 'p'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['P', 'P', 'P', 'P', 'P', 'P', 'P', 'P'],
       ['R', 'N', 'B', 'Q', 'K', 'B', 'N', 'R']], dtype='<U1')

In [12]:
le = preprocessing.LabelEncoder()

def label_encode_game(game):
    board = game.board()
    moves = get_moves(game)
    encoded_moves = []
    for move in moves:
        board.push(move)
        encoded_moves.append(le.fit_transform(get_board_matrix(board.fen()).flatten()))
    return encoded_moves

In [13]:
label_encoded_games = []
for game in games:
    label_encoded_games.append(label_encode_game(game))

In [155]:
(label_encoded_games[1][0])

array([12,  9,  7, 11,  8,  7,  9, 12, 10, 10, 10, 10, 10, 10, 10, 10,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  4,
        4,  0,  4,  4,  4,  6,  3,  1,  5,  2,  1,  3,  6], dtype=int64)

### Evaluation Extraction

In [14]:
def get_node(game):
    comments = []
    for node in game.mainline_moves():
        comments.append(node.comment)
    return comments
        

In [15]:
def get_evaluation(game):
    evaluation = []
    for node in game.mainline():
        comment = node.comment
        comment = comment.split('/')
        evaluation.append(comment[0])
    return evaluation

In [16]:
evaluations = []
for game in games:
    evaluations.append(get_evaluation(game))

In [17]:
evaluations[1][0] 


'0.16'

In [18]:
# X is the board state
# Y is the evaluation

y = []
x =[]
for i in range(len(label_encoded_games)):
    for j in range(len(label_encoded_games[i])):
        if evaluations[i][j] == '':
            evaluations[i][j] = evaluations[i][j-1] #
        else:
            y.append(evaluations[i][j])


y = np.array(y)


In [19]:
#save y


In [23]:
#shape of y eval
print(y.shape)
#load x
x = np.load('data/x.npy')
print(x.shape)

(123633,)
(126256, 64)


In [26]:
print(y)
y = np.asarray(y, dtype=float)
y_thresholded = np.zeros(len(y))
y.shape
np.save('data/y_eval.npy', y)


[  0.24  -0.3    0.28 ... -73.24  62.65 -74.24]


In [27]:
y.shape

(123633,)

In [316]:
y_thresholded = np.where(y > 1.5, 1, y_thresholded)
y_thresholded = np.where(y < -1.5, -1, y_thresholded)
y_thresholded = np.where((y >= -1.5) & (y <= 1.5), 0, y_thresholded)

In [317]:
print(x[59])
print(y_thresholded[59], '\n\n')
print(x[60])
print(y_thresholded[60])

IndexError: index 59 is out of bounds for axis 0 with size 1

- Problems:
    - The research paper says 'Pawns are represented as 1, Bishops and
Knights as 3, Rooks as 5, Queens as 9 and the Kings as 10. These values are negated for the opponent
    - Our label encoder has no way of knowing which player is which color. The two moves and their evaluations above is the proof that our model can get confused.

- Solution:
    - Use FEN notation to encode

### FEN Encoding

In [167]:
games[0].board().fen()

'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'

In [171]:
games[0].board().fen().split(' ')[1]

'w'

In [173]:
get_board_matrix(games[0].board().fen())

array([['r', 'n', 'b', 'q', 'k', 'b', 'n', 'r'],
       ['p', 'p', 'p', 'p', 'p', 'p', 'p', 'p'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['.', '.', '.', '.', '.', '.', '.', '.'],
       ['P', 'P', 'P', 'P', 'P', 'P', 'P', 'P'],
       ['R', 'N', 'B', 'Q', 'K', 'B', 'N', 'R']], dtype='<U1')

In [28]:
def check_turn(board_state):
    if board_state.split(' ')[1] == 'w':
        return 'w'
    else:
        return 'b' 

In [29]:
def label_encode_for_white(board_matrix):

    new_board_matrix = np.zeros((8, 8), dtype=int)
   
    for i in range(len(board_matrix)):
        for j in range(len(board_matrix[i])):
            if board_matrix[i][j] == 'P':
                new_board_matrix[i][j] = 1
            elif board_matrix[i][j] == 'N':
                new_board_matrix[i][j] = 3
            elif board_matrix[i][j] == 'B':
                new_board_matrix[i][j] = 3
            elif board_matrix[i][j] == 'R':
                new_board_matrix[i][j] = 5
            elif board_matrix[i][j] == 'Q':
                new_board_matrix[i][j] = 9
            elif board_matrix[i][j] == 'K':
                new_board_matrix[i][j] = 10
            elif board_matrix[i][j] == '.':
                new_board_matrix[i][j] = 0
            elif board_matrix[i][j] == 'p':
                new_board_matrix[i][j] = -1
            elif board_matrix[i][j] == 'n':
                new_board_matrix[i][j] = -3
            elif board_matrix[i][j] == 'b':
                new_board_matrix[i][j] = -3
            elif board_matrix[i][j] == 'r':
                new_board_matrix[i][j] = -5
            elif board_matrix[i][j] == 'q':
                new_board_matrix[i][j] = -9
            elif board_matrix[i][j] == 'k':
                new_board_matrix[i][j] = -10
    return new_board_matrix


def label_encode_for_black(board_matrix):
    new_board_matrix = np.zeros((8, 8), dtype=int)
    
    for i in range(len(board_matrix)):
        for j in range(len(board_matrix[i])):
            if board_matrix[i][j] == 'P':
                new_board_matrix[i][j] = -1
            elif board_matrix[i][j] == 'N':
                new_board_matrix[i][j] = -3
            elif board_matrix[i][j] == 'B':
                new_board_matrix[i][j] = -3
            elif board_matrix[i][j] == 'R':
                new_board_matrix[i][j] = -5
            elif board_matrix[i][j] == 'Q':
                new_board_matrix[i][j] = -9
            elif board_matrix[i][j] == 'K':
                new_board_matrix[i][j] = -10
            elif board_matrix[i][j] == '.':
                new_board_matrix[i][j] = 0
            elif board_matrix[i][j] == 'p':
                new_board_matrix[i][j] = 1
            elif board_matrix[i][j] == 'n':
                new_board_matrix[i][j] = 3
            elif board_matrix[i][j] == 'b':
                new_board_matrix[i][j] = 3
            elif board_matrix[i][j] == 'r':
                new_board_matrix[i][j] = 5
            elif board_matrix[i][j] == 'q':
                new_board_matrix[i][j] = 9
            elif board_matrix[i][j] == 'k':
                new_board_matrix[i][j] = 10
    return new_board_matrix


In [30]:
def label_helper(board_state):
    board_matrix = get_board_matrix(board_state)
    if check_turn(board_state) == 'w':
       board_matrix = label_encode_for_black(board_matrix)
    elif check_turn(board_state) == 'b':
       board_matrix = label_encode_for_white(board_matrix)
    return board_matrix
        

In [31]:
# new label encoding
def label_encode_game_based_on_turn(game):
    board = game.board()
    moves = get_moves(game)
    encoded_moves = []
    for move in moves:
        board.push(move)
        board_state = board.fen()
        # encoded_moves.append(le.fit_transform(get_board_matrix(board.fen()).flatten()))
        encoded_moves.append(label_helper(board_state).flatten())
    return encoded_moves

In [32]:
label_encoded_games_based_on_turn = []
for game in games:
    label_encoded_games_based_on_turn.append(label_encode_game_based_on_turn(game))

In [33]:
print(label_encoded_games_based_on_turn[0][1])
print(games[0][0][0].board().fen())
print(get_board_matrix(games[0][0][0].board().fen()))

[  5   3   3   9  10   3   0   5   1   1   1   1   1   1   1   1   0   0
   0   0   0   3   0   0   0   0   0   0   0   0   0   0   0   0   0  -1
   0   0   0   0   0   0   0   0   0   0   0   0  -1  -1  -1   0  -1  -1
  -1  -1  -5  -3  -3  -9 -10  -3  -3  -5]
rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 1 2
[['r' 'n' 'b' 'q' 'k' 'b' '.' 'r']
 ['p' 'p' 'p' 'p' 'p' 'p' 'p' 'p']
 ['.' '.' '.' '.' '.' 'n' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.']
 ['.' '.' '.' 'P' '.' '.' '.' '.']
 ['.' '.' '.' '.' '.' '.' '.' '.']
 ['P' 'P' 'P' '.' 'P' 'P' 'P' 'P']
 ['R' 'N' 'B' 'Q' 'K' 'B' 'N' 'R']]


In [37]:
x = []
y = []
for i in range(len(label_encoded_games_based_on_turn)):
    for j in range(len(label_encoded_games_based_on_turn[i])):
        if evaluations[i][j] == '':
            evaluations[i][j] = evaluations[i][j-1] #
        else:
            y.append(evaluations[i][j])

        x.append(label_encoded_games_based_on_turn[i][j])

x = np.array(x)
y = np.array(y)

In [39]:
x.shape
y.shape

(126256,)

In [44]:
#save x and y
# np.save('x_eval.npy', x)
# np.save('y_eval.npy', y)

#load x and y
x = np.load('x_eval.npy')
y = np.load('y_eval.npy')
print(y.shape)
print(x.shape)
print(y[0])


(126256,)
(126256, 64)
0.24


In [319]:
print(x[61])
print((y[61]))

[  0   0   0   0   5   0  10   0   0   0   0   0   0   1   1   1   0   0
  -3   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  -9
   5   0   0   0   0   0  -5   0  -1   0   0   0  -1   9   0   0   0  -1
  -1   0   0   0   0   0   0 -10   0   0]
-3.11


In [320]:
# Define the labels
winning_labels = [f"Winning-{i}" for i in range(1, 8)]
losing_labels = [f"Losing-{i}" for i in range(1, 8)]
draw_label = "Draw"

# Initialize the label list
labels = []

# Assign labels based on the evaluation values
for i in range(len(y)):
    if y[i] > 1.5:
        labels.append(winning_labels[min(int((y[i]-1.5)/1), 6)])
    elif y[i] < -1.5:
        labels.append(losing_labels[min(int((abs(y[i])-1.5)/1), 6)])
    else:
        labels.append(draw_label)


In [321]:
labels = np.array(labels)
# print where label == with winning-6
# print(np.where(labels == 'Winning-1'))
print(labels[41])
print(y[41])


Winning-3
4.3


In [322]:
for i in range(len(labels)):
    if labels[i].startswith('Winning'):
        labels[i] = int(labels[i].split('-')[1])
    elif labels[i].startswith('Losing'):
        labels[i] = -int(labels[i].split('-')[1])
    elif labels[i] == 'Draw':
        labels[i] = 0

In [323]:
print(x[41])
print(labels[41])
print(type(labels[0]))
labels = labels.astype(int)
labels

[  0   0   0   0   5   0  10   0   1   0   0   0   0   1   1   1   0   0
   1   9   0   0   5   0   0   0   0   1   0   0   0   0   0   0   0  -9
   0   0   0   0  -5   0   0   0  -1   0   0   0  -1  -1   0   0   3  -1
  -1  -1   0   0   0   0  -3  -5 -10   0]
3
<class 'numpy.str_'>


array([ 0,  0,  0, ...,  7, -7, -7])

- This labeling and encoding seems reasonable. In the next notebook, we will use this data to train a model.

In [324]:
# save the data
# np.save('x.npy', x)
# np.save('y.npy', labels)

## Playing 

In [6]:
#import load_model
from tensorflow.keras.models import load_model
# load model
model = load_model('chessengine_13M.h5') #

In [337]:
# predict
print(x[61])
predictions = model.predict(x[100432].reshape(1, 64))
prediction = np.argmax(predictions, axis=1)
print(prediction)

[  0   0   0   0   5   0  10   0   0   0   0   0   0   1   1   1   0   0
  -3   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0  -9
   5   0   0   0   0   0  -5   0  -1   0   0   0  -1   9   0   0   0  -1
  -1   0   0   0   0   0   0 -10   0   0]
[14]


In [7]:
import chess.polyglot

# Define opening book
with open("book.bin", "rb") as f:
    opening_book = chess.polyglot.MemoryMappedReader(f)

board = chess.Board()

# Keep track of the number of moves played
num_moves = 0

# Play opening moves
while not board.is_game_over() and num_moves < 5:
    if board.turn == chess.WHITE:
        try:
            move = opening_book.weighted_choice(board).move
            board.push(move)
            num_moves += 1
        except KeyError:
            break
    else:
        break

    display.display(board)

# Play against the model
while not board.is_game_over():
    legal_moves = [move for move in board.legal_moves]
    best_score = float('-inf')
    best_move_index = None

    for i, move in enumerate(legal_moves):
        board.push(move)
        x = np.array([label_helper(board.fen()).flatten()])
        y = model.predict(x)[0]
        score = y.max()  # get the highest score out of all classes
        if score > best_score:
            best_score = score
            best_move_index = i
        board.pop()
        print(move, score)

    best_move = legal_moves[best_move_index]
    print('best move:', best_move)

    board.push(best_move)
    display.display(board)

    # Check if the game has gone past the opening phase
    if num_moves >= 5:
        human_move = input('Your move: ')
        try:
            board.push_san(human_move)
        except ValueError:
            print('Illegal move')
    else:
        num_moves += 1

    display.display(board)


FileNotFoundError: [Errno 2] No such file or directory: 'book.bin'

In [None]:
# # play chess
# import chess
# import random
# from IPython import display
# import numpy as np

# board = chess.Board()
# display.display(board)

# # play against the model
# while not board.is_game_over():
#     legal_moves = [move for move in board.legal_moves]
#     best_score = float('-inf')
#     best_move_index = None

#     for i, move in enumerate(legal_moves):
#         board.push(move)
#         x = np.array([label_helper(board.fen()).flatten()])
#         y = model.predict(x)[0]
#         score = y.max()  # get the highest score out of all classes
#         if score > best_score:
#             best_score = score
#             best_move_index = i
#         board.pop()
#         print(move, score)
        
#     best_move = legal_moves[best_move_index]
#     print('best move:', best_move)

#     board.push(best_move)
#     display.display(board)

#     human_move = input('Your move: ')
#     try:
#         board.push_san(human_move)
#     except ValueError:
#         print('Illegal move')

#     display.display(board)
