In [34]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import chess as chess


## Game extraction

# Here is where the games from every file are extracted

STEPS:
* Read dataset folder
* Append every game to a single list

In [45]:
directory_path = 'C:/Users/diogo/OneDrive/Ambiente de Trabalho/Datasets/chess'

def getGames(directory_path=directory_path):
  games = []
  for filename in os.listdir(directory_path):
      if filename.endswith('.pgn'):
          pgn_path = os.path.join(directory_path, filename)
          with open(pgn_path) as pgn_file:
            while True:
              game = chess.pgn.read_game(pgn_file) #PGN is an easy way to save chess games. With python-chess dealing with PGNs is easier
              if game is None:
                break  #Exit if there are no more games in the file
              games.append(game)

  return games

In [46]:
games = getGames()
print(games[1])

[Event "Troll Masters"]
[Site "Gausdal NOR"]
[Date "2001.01.06"]
[Round "2"]
[White "Carlsen,Magnus"]
[Black "Brameld,A"]
[Result "0-1"]
[WhiteElo ""]
[BlackElo "2095"]
[ECO "B05"]

1. e4 Nf6 2. e5 Nd5 3. d4 d6 4. Nf3 Bg4 5. Bc4 e6 6. O-O Nb6 7. Be2 Be7 8. h3 Bh5 9. Bf4 Nc6 10. c3 O-O 11. Nbd2 d5 12. b4 a5 13. a3 Qd7 14. Qc2 Bg6 15. Bd3 Rfc8 16. Rfb1 Bf8 17. h4 Ne7 18. g3 Qa4 19. Ne1 Qxc2 20. Bxc2 Bxc2 21. Nxc2 Na4 22. Rb3 b6 23. Kf1 c5 24. bxc5 bxc5 25. dxc5 Rxc5 26. Nb1 Rac8 27. Be3 Rc4 28. Bd4 Nc6 29. Rb5 Nxd4 30. Nxd4 Nxc3 31. Nxc3 Rxd4 32. Ne2 Ra4 33. Ke1 Rxa3 34. Rab1 Bb4+ 35. Kf1 Rd3 0-1


## Game Lines and FEN

# Here i extract the game line and FEN string of every board state after every move

STEPS:
* Get the game main line and save it
* For every game, move by move, create a FEN string of the board state and save it 

In [79]:
def create_datasets(games):
    white_dataset = []
    black_dataset = []

    for game in games:
        board = game.board()

        for move_str in game.mainline_moves():
            # Parse the move using python-chess
            move = chess.Move.from_uci(move_str.uci())

            # Check if it's White's move
            if board.turn == chess.WHITE:
                # Append the current board state and the move to the White dataset
                white_dataset.append((board.fen(), move_str.uci()))
            else:
                # Append the current board state and the move to the Black dataset
                black_dataset.append((board.fen(), move_str.uci()))

            # Make the move on the board
            board.push(move)

    return white_dataset, black_dataset

In [80]:
white_moves, black_moves = create_datasets(games)

In [82]:
print(white_moves[:5])

[('rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1', 'd2d4'), ('rnbqkb1r/pppppppp/5n2/8/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 1 2', 'g1f3'), ('rnbqkb1r/ppp1pppp/5n2/3p4/3P4/5N2/PPP1PPPP/RNBQKB1R w KQkq - 0 3', 'e2e3'), ('rn1qkb1r/ppp1pppp/5n2/3p1b2/3P4/4PN2/PPP2PPP/RNBQKB1R w KQkq - 1 4', 'c2c4'), ('rn1qkb1r/pp2pppp/2p2n2/3p1b2/2PP4/4PN2/PP3PPP/RNBQKB1R w KQkq - 0 5', 'b1c3')]


In [84]:
print(black_moves[:5])

[('rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq - 0 1', 'g8f6'), ('rnbqkb1r/pppppppp/5n2/8/3P4/5N2/PPP1PPPP/RNBQKB1R b KQkq - 2 2', 'd7d5'), ('rnbqkb1r/ppp1pppp/5n2/3p4/3P4/4PN2/PPP2PPP/RNBQKB1R b KQkq - 0 3', 'c8f5'), ('rn1qkb1r/ppp1pppp/5n2/3p1b2/2PP4/4PN2/PP3PPP/RNBQKB1R b KQkq - 0 4', 'c7c6'), ('rn1qkb1r/pp2pppp/2p2n2/3p1b2/2PP4/2N1PN2/PP3PPP/R1BQKB1R b KQkq - 1 5', 'e7e6')]


## Data Preprocessing

STEPS:

* 
* 
* 