In [1]:
import zstandard as zstd

def decompress_zst(input_file, output_file):
    with open(input_file, 'rb') as compressed:
        with open(output_file, 'wb') as decompressed:
            dctx = zstd.ZstdDecompressor()
            dctx.copy_stream(compressed, decompressed)

# Decompress the PGN file
decompress_zst('lichess_db_standard_rated_2014-07.pgn.zst', 'lichess_db_standard_rated_2014-07.pgn')


In [2]:
import chess
import chess.pgn
import pandas as pd

def parse_pgn(file_path, max_games=None):
    games = []
    with open(file_path) as f:
        for idx, game in enumerate(iter(lambda: chess.pgn.read_game(f), None)):
            if max_games and idx >= max_games:
                break  
            board = game.board()
            game_moves = [(board.fen(), move.uci()) for move in game.mainline_moves()]
            games.extend(game_moves)  # Append moves in bulk for each game
    return pd.DataFrame(games, columns=['fen', 'move'])


df = parse_pgn('lichess_db_standard_rated_2014-07.pgn')


KeyboardInterrupt: 

In [2]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Encode the moves
le = LabelEncoder()
df['move'] = le.fit_transform(df['move'])

# Convert FEN to a numerical representation
def fen_to_input(fen):
    board = chess.Board(fen)
    input_vector = np.zeros((8, 8, 12))  # 12 for each piece type
    piece_map = {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n': 7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
    }
    for square, piece in board.piece_map().items():
        x, y = divmod(square, 8)
        input_vector[x, y, piece_map[piece.symbol()]] = 1
    return input_vector.flatten()

# Prepare inputs and targets
X = np.array([fen_to_input(fen) for fen in df['fen']])
y = df['move'].values


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(len(le.classes_), activation='softmax')  # Output layer for each possible move
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [5]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2d8e3a92fd0>

In [6]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')


Test Accuracy: 0.0112


In [7]:
model.save('chess_model.h5')


In [9]:
def predict_move(fen):
    input_data = fen_to_input(fen)
    prediction = model.predict(np.array([input_data]))
    predicted_move = le.inverse_transform(np.argmax(prediction, axis=1))
    return predicted_move[0]

# Example of predicting a move
fen_example = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1"
predicted_move = predict_move(fen_example)
print(f'Predicted Move: {predicted_move}')


Predicted Move: g1f3
