In [9]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import chess


## Game extraction

# Here is where the games from every file are extracted

STEPS:
* Read dataset folder
* Append every game to a single list

In [10]:
directory_path = 'C:/Users/diogo/OneDrive/Ambiente de Trabalho/Datasets/chess'

def getGames(directory_path=directory_path):
  games = []
  for filename in os.listdir(directory_path):
      if filename.endswith('.pgn'):
          pgn_path = os.path.join(directory_path, filename)
          with open(pgn_path) as pgn_file:
            while True:
              game = chess.pgn.read_game(pgn_file) #PGN is an easy way to save chess games. With python-chess dealing with PGNs is easier
              if game is None:
                break  #Exit if there are no more games in the file
              games.append(game)

  return games

In [11]:
games = getGames()
print(games[1])

[Event "Troll Masters"]
[Site "Gausdal NOR"]
[Date "2001.01.06"]
[Round "2"]
[White "Carlsen,Magnus"]
[Black "Brameld,A"]
[Result "0-1"]
[WhiteElo ""]
[BlackElo "2095"]
[ECO "B05"]

1. e4 Nf6 2. e5 Nd5 3. d4 d6 4. Nf3 Bg4 5. Bc4 e6 6. O-O Nb6 7. Be2 Be7 8. h3 Bh5 9. Bf4 Nc6 10. c3 O-O 11. Nbd2 d5 12. b4 a5 13. a3 Qd7 14. Qc2 Bg6 15. Bd3 Rfc8 16. Rfb1 Bf8 17. h4 Ne7 18. g3 Qa4 19. Ne1 Qxc2 20. Bxc2 Bxc2 21. Nxc2 Na4 22. Rb3 b6 23. Kf1 c5 24. bxc5 bxc5 25. dxc5 Rxc5 26. Nb1 Rac8 27. Be3 Rc4 28. Bd4 Nc6 29. Rb5 Nxd4 30. Nxd4 Nxc3 31. Nxc3 Rxd4 32. Ne2 Ra4 33. Ke1 Rxa3 34. Rab1 Bb4+ 35. Kf1 Rd3 0-1


## Game Lines and FEN

# Here i extract the game line and FEN string of every board state after every move

STEPS:
* Get the game main line and save it
* For every game, move by move, create a FEN string of the board state and save it 

In [12]:
def create_datasets(games):
    dataset = []

    for game in games:
        board = game.board()

        for move_str in game.mainline_moves():
            # Parse the move using python-chess
            move = chess.Move.from_uci(move_str.uci())
            
            dataset.append((board.fen(), move_str.uci()))
            # Make the move on the board
            board.push(move)

    return dataset

In [13]:
data = create_datasets(games)

In [15]:
print(data[:5])

[('rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq - 0 1', 'g8f6'), ('rnbqkb1r/pppppppp/5n2/8/3P4/5N2/PPP1PPPP/RNBQKB1R b KQkq - 2 2', 'd7d5'), ('rnbqkb1r/ppp1pppp/5n2/3p4/3P4/4PN2/PPP2PPP/RNBQKB1R b KQkq - 0 3', 'c8f5'), ('rn1qkb1r/ppp1pppp/5n2/3p1b2/2PP4/4PN2/PP3PPP/RNBQKB1R b KQkq - 0 4', 'c7c6'), ('rn1qkb1r/pp2pppp/2p2n2/3p1b2/2PP4/2N1PN2/PP3PPP/R1BQKB1R b KQkq - 1 5', 'e7e6')]


## Data Preprocessing

STEPS:

* Create Numpy arrays for each dataset
* For fen strings, convert them into 'board arrays' of integers
* For moves convert them to a vector of integers indicating starter square and ending square 

In [17]:
data_np = np.array(data)
data

array([['rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1',
        'd2d4'],
       ['rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 1 2',
        'g1f3'],
       ['rnbqkb1r/ppp1pppp/5n2/3p4/3P4/5N2/PPP1PPPP/RNBQKB1R w KQkq - 0 3',
        'e2e3'],
       ...,
       ['6Q1/5pq1/3P2k1/1p3bp1/7p/7P/2p2PP1/4R1K1 w - - 2 42', 'g8e8'],
       ['4Q3/5p2/3P2k1/1p3bp1/7p/2q4P/2p2PP1/4R1K1 w - - 4 43', 'e8g8'],
       ['6Q1/5p2/3P3k/1p3bp1/7p/2q4P/2p2PP1/4R1K1 w - - 6 44', 'g8f8']],
      dtype='<U76')

In [54]:
def fenToMatrix(fen):
  
  #Black pieces are positive
  #White pieces are negative
  
  piece_numberization = {
    'p': 1, 'r': 2, 'n': 3, 'b': 4, 'q': 5, 'k': 6,
    'P': -1, 'R': -2, 'N': -3, 'B': -4, 'Q': -5, 'K': -6,
    '.': 0
  }
  
  board = chess.Board(fen)
  
  fen_matrix = np.zeros((8, 8), dtype=int)
  for i in range(64):
    piece = board.piece_at(i)
    symbol = piece.symbol() if piece else '.'
    fen_matrix[i // 8, i % 8] = piece_numberization[symbol]

  return fen_matrix

print(fenToMatrix(data_np[0][0]))


[[-2 -3 -4 -5 -6 -4 -3 -2]
 [-1 -1 -1 -1 -1 -1 -1 -1]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 1  1  1  1  1  1  1  1]
 [ 2  3  4  5  6  4  3  2]]


In [55]:
fen_matrices = np.array([fenToMatrix(fen) for fen, move in data_np])
print(fen_matrices[0])

[[-2 -3 -4 -5 -6 -4 -3 -2]
 [-1 -1 -1 -1 -1 -1 -1 -1]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0]
 [ 1  1  1  1  1  1  1  1]
 [ 2  3  4  5  6  4  3  2]]


In [57]:
def move_to_vector(move):
  # First, we define two strings representing the ranks and files on a chessboard.
  rows = ['1', '2', '3', '4', '5', '6', '7', '8']
  colls = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']

  #Now we map each rank to a number
  square_to_num = {}
  counter = 0
  for col in colls:
    for row in rows:
      square_to_num[col + row] = counter
      counter += 1

  start_square = move[:2]  # First two characters of the move
  end_square = move[2:4]  # Next two characters of the move
  # Now we convert each square to its numeric representation.
  start_vector = square_to_num[start_square]
  end_vector = square_to_num[end_square]

  # Finally, we return these numbers in a list.
  return [start_vector, end_vector]



In [58]:
move_vector = np.array([move_to_vector(move) for fen, move in data_np])
print(move_vector[0])

[25 27]


In [None]:
def getSequenceData(data, history_length=10):
  num_samples = len(data_np)
  board_history = np.zeros((num_samples, history_length, 8, 8), dtype=int)
  move_history = np.zeros((num_samples, history_length, 2), dtype=int)
  data_indicator = np.zeros((num_samples, history_length, 1), dtype=int)

  for i in range(num_samples):
    for j in range(history_length):
      if i - j >= 0:
        fen, move = data_np[i - j]
        board_history[i, history_length - j - 1] = fenToMatrix(fen)
        move_history[i, history_length - j - 1] = move_to_vector(move)
        data_indicator[i, history_length - j - 1] = 1  # Mark as real data

  return board_history, move_history, data_indicator
  
  


In [60]:
fen_reshaped = fen_matrices.reshape((-1, 8, 8, 1))  # Added the channel dimension

In [61]:
# Split data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(fen_reshaped, move_vector, test_size=0.2)






In [62]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, LSTM, Flatten, concatenate
