In [1]:
import pandas as pd
import time
start = time.time()

df = pd.read_csv('chess_games.csv',nrows=1_000)
df['AN']

0      1. d4 d5 2. c4 c6 3. e3 a6 4. Nf3 e5 5. cxd5 e...
1      1. e4 e5 2. b3 Nf6 3. Bb2 Nc6 4. Nf3 d6 5. d3 ...
2      1. e4 d5 2. exd5 Qxd5 3. Nf3 Bg4 4. Be2 Nf6 5....
3      1. e3 Nf6 2. Bc4 d6 3. e4 e6 4. Nf3 Nxe4 5. Nd...
4      1. e4 c5 2. Nf3 d6 3. d4 cxd4 4. Nxd4 Nf6 5. N...
                             ...                        
995    1. d4 d5 2. Nf3 Nc6 3. e3 Nf6 4. Bd3 e6 5. Nbd...
996    1. d4 e6 2. e4 c5 3. d5 d6 4. Nc3 f5 5. dxe6 B...
997    1. e4 e6 2. d4 c5 3. c3 a6 4. Nf3 h6 5. Be2 Be...
998    1. e3 { [%eval 0.1] } 1... c5 { [%eval 0.18] }...
999    1. e4 c6 2. Nc3 Nf6 3. g3 d6 4. Bg2 g6 5. Nge2...
Name: AN, Length: 1000, dtype: object

In [2]:
import re

def moves_to_list(moves):
    moves_list = []
    for element in moves.split():
        if re.match('[a-zA-Z]+[1-9]|O-O|O-O-O', element) is not None:
            element = element.replace('?', '')
            element = element.replace('!', '')
            element = element.replace('+', '')

            moves_list.append(element)
    
    return moves_list

df['move_list'] = df['AN'].apply(moves_to_list)
df['move_list']

0      [d4, d5, c4, c6, e3, a6, Nf3, e5, cxd5, e4, Ne...
1      [e4, e5, b3, Nf6, Bb2, Nc6, Nf3, d6, d3, g6, N...
2      [e4, d5, exd5, Qxd5, Nf3, Bg4, Be2, Nf6, Nc3, ...
3      [e3, Nf6, Bc4, d6, e4, e6, Nf3, Nxe4, Nd4, Nxf...
4      [e4, c5, Nf3, d6, d4, cxd4, Nxd4, Nf6, Nc3, a6...
                             ...                        
995    [d4, d5, Nf3, Nc6, e3, Nf6, Bd3, e6, Nbd2, Be7...
996    [d4, e6, e4, c5, d5, d6, Nc3, f5, dxe6, Bxe6, ...
997    [e4, e6, d4, c5, c3, a6, Nf3, h6, Be2, Be7, O-...
998    [e3, c5, Ne2, Nc6, Ng3, d6, Be2, e5, O-O, f5, ...
999    [e4, c6, Nc3, Nf6, g3, d6, Bg2, g6, Nge2, Bg7,...
Name: move_list, Length: 1000, dtype: object

In [3]:
import chess
import numpy as np


def save_board_state_after_move(moves):
    board = chess.Board()
    FEN_list = []
    try:
        for move in moves:
            board.push_san(move)
            FEN_list.append(board.fen())
        return FEN_list
    except Exception as e:
        print(e)
        return np.nan

df['FENs'] = df['move_list'].apply(save_board_state_after_move)

In [4]:
exploded_FENs = df['FENs'].explode()

In [7]:
exploded_FENs.values

array(['rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR b KQkq - 0 1',
       'rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBNR w KQkq - 0 2',
       'rnbqkbnr/ppp1pppp/8/3p4/2PP4/8/PP2PPPP/RNBQKBNR b KQkq - 0 2',
       ...,
       'r2r2k1/pp2ppbp/1np1bnp1/8/5P2/1PN3P1/P1PRN1BP/R1B3K1 w - - 1 15',
       'r2r2k1/pp2ppbp/1np1bnp1/8/5P2/BPN3P1/P1PRN1BP/R5K1 b - - 2 15',
       'r5k1/pp2ppbp/1np1bnp1/8/5P2/BPN3P1/P1PrN1BP/R5K1 w - - 0 16'],
      dtype=object)

In [43]:
from stockfish import Stockfish
from random import randint

def generate_stockfish_moves(FEN, num_moves=1):
    FENs_list = [FEN]
    stockfish = Stockfish(path="D:\Programowanie\StockFish\stockfish_15.1_win_x64_avx2\stockfish-windows-2022-x86-64-avx2.exe")
    board = chess.Board(FEN)
    stockfish.set_fen_position(FEN)
    for i in range(num_moves):
        if board.is_checkmate():
            break
        stockfish.set_fen_position(board.fen())
        top_moves = stockfish.get_top_moves(3)
        selected_move = randint(0, len(top_moves)-1)
        move = top_moves[selected_move]['Move']
        # move = stockfish.get_best_move()
        try:
            board.push_uci(move)
            FENs_list.append(board.fen())
        except Exception as e:
            print(e)
            break
    return FENs_list

In [14]:
from threading import Thread

class CustomThread(Thread):
    def __init__(self, target, fen, index, num_moves):
        Thread.__init__(self)
        self.target = target
        self.fen = fen
        self.index = index
        self.num_moves = num_moves
        self.value = None
 
    def run(self):
        self.value = self.target(self.fen, self.index, self.num_moves)

In [45]:
from tqdm import tqdm

stockfish_fens_list = []
board = chess.Board()
fen = board.fen()
for _ in tqdm(range(125)):
    threads = list()
    for _ in range(8):
        thread = CustomThread(target=generate_stockfish_moves, fen=fen, num_moves=68)
        threads.append(thread)
        thread.start()
    
    for thread in threads:
        thread.join()
        stockfish_fens_list.append(thread.value)
# for _ in tqdm(range(1000)):
#     stockfish_fens_list.append(generate_stockfish_moves(fen, 68))

100%|██████████| 125/125 [1:15:24<00:00, 36.20s/it]


In [40]:
len(stockfish_fens_list)

80

In [47]:
data = {'FENs' : stockfish_fens_list}

stockfish_exploded_fens = pd.DataFrame(data=data)['FENs'].explode()
stockfish_exploded_fens

0      rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...
0      rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
0      rnbqkbnr/pp1ppppp/8/2p5/4P3/8/PPPP1PPP/RNBQKBN...
0      rnbqkbnr/pp1ppppp/8/2p5/4P3/2N5/PPPP1PPP/R1BQK...
0      rnbqkbnr/pp1p1ppp/4p3/2p5/4P3/2N5/PPPP1PPP/R1B...
                             ...                        
999    b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P1KP1PqP/1r6 w - ...
999    b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P2P1PqP/1K6 b - -...
999    b5k1/5pb1/p7/2p3p1/2P1q1Q1/6P1/P2P1P1P/1K6 w -...
999    b5k1/5pb1/p7/2p3p1/2P1q1Q1/3P2P1/P4P1P/1K6 b -...
999    b5k1/5pb1/p7/2p3p1/2P3Q1/3P2P1/P4P1P/1K2q3 w -...
Name: FENs, Length: 68288, dtype: object

In [48]:
data = {'index' : stockfish_exploded_fens.index, 'FEN' : stockfish_exploded_fens.values, 'label' : 1}

stockfish_fens_df = pd.DataFrame(data=data)
stockfish_fens_df

Unnamed: 0,index,FEN,label
0,0,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,1
1,0,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,1
2,0,rnbqkbnr/pp1ppppp/8/2p5/4P3/8/PPPP1PPP/RNBQKBN...,1
3,0,rnbqkbnr/pp1ppppp/8/2p5/4P3/2N5/PPPP1PPP/R1BQK...,1
4,0,rnbqkbnr/pp1p1ppp/4p3/2p5/4P3/2N5/PPPP1PPP/R1B...,1
...,...,...,...
68283,999,b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P1KP1PqP/1r6 w - ...,1
68284,999,b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P2P1PqP/1K6 b - -...,1
68285,999,b5k1/5pb1/p7/2p3p1/2P1q1Q1/6P1/P2P1P1P/1K6 w -...,1
68286,999,b5k1/5pb1/p7/2p3p1/2P1q1Q1/3P2P1/P4P1P/1K6 b -...,1


In [49]:
data = {'index' : exploded_FENs.index, 'FEN' : exploded_FENs.values, 'label' : 0}

human_fens_df = pd.DataFrame(data=data)
human_fens_df

Unnamed: 0,index,FEN,label
0,0,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...,0
1,0,rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBN...,0
2,0,rnbqkbnr/ppp1pppp/8/3p4/2PP4/8/PP2PPPP/RNBQKBN...,0
3,0,rnbqkbnr/pp2pppp/2p5/3p4/2PP4/8/PP2PPPP/RNBQKB...,0
4,0,rnbqkbnr/pp2pppp/2p5/3p4/2PP4/4P3/PP3PPP/RNBQK...,0
...,...,...,...
68012,999,r4rk1/pp2ppbp/1np1bnp1/8/5P2/2N3P1/PPPRN1BP/R1...,0
68013,999,r4rk1/pp2ppbp/1np1bnp1/8/5P2/1PN3P1/P1PRN1BP/R...,0
68014,999,r2r2k1/pp2ppbp/1np1bnp1/8/5P2/1PN3P1/P1PRN1BP/...,0
68015,999,r2r2k1/pp2ppbp/1np1bnp1/8/5P2/BPN3P1/P1PRN1BP/...,0


In [54]:
preprocessed_dataset = pd.concat([human_fens_df, stockfish_fens_df])
preprocessed_dataset['index'] = preprocessed_dataset['index'] + 1000 * preprocessed_dataset['label']
preprocessed_dataset

Unnamed: 0,index,FEN,label
0,0,rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...,0
1,0,rnbqkbnr/ppp1pppp/8/3p4/3P4/8/PPP1PPPP/RNBQKBN...,0
2,0,rnbqkbnr/ppp1pppp/8/3p4/2PP4/8/PP2PPPP/RNBQKBN...,0
3,0,rnbqkbnr/pp2pppp/2p5/3p4/2PP4/8/PP2PPPP/RNBQKB...,0
4,0,rnbqkbnr/pp2pppp/2p5/3p4/2PP4/4P3/PP3PPP/RNBQK...,0
...,...,...,...
68283,1999,b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P1KP1PqP/1r6 w - ...,1
68284,1999,b5k1/5pb1/p7/2p3p1/2P3Q1/6P1/P2P1PqP/1K6 b - -...,1
68285,1999,b5k1/5pb1/p7/2p3p1/2P1q1Q1/6P1/P2P1P1P/1K6 w -...,1
68286,1999,b5k1/5pb1/p7/2p3p1/2P1q1Q1/3P2P1/P4P1P/1K6 b -...,1


In [55]:
preprocessed_dataset.to_csv('dataset_lstm_2.csv')