In [1]:
import chess.pgn
import pandas as pd

def extract_moves(game):
    moves = []
    node = game
    move_number = 1
    board = chess.Board()  # Create a new board object
    while not node.is_end():
        node = node.variations[0]
        move = node.move
        color = "Black" if node.board().turn == chess.WHITE else "White"  # Reverse the color assignment
        piece = get_piece_from_move(board, move)
        move_from = chess.square_name(move.from_square)
        move_to = chess.square_name(move.to_square)

        moves.append((color, move_number, piece, move_from, move_to))
        move_number += 1
        board.push(move)  # Push the move onto the board

    return moves

def get_piece_from_move(board, move):
    piece = board.piece_at(move.from_square)
    if piece is None:
        return "Pawn"
    elif piece.piece_type == chess.KNIGHT:
        return "Knight"
    elif piece.piece_type == chess.BISHOP:
        return "Bishop"
    elif piece.piece_type == chess.ROOK:
        return "Rook"
    elif piece.piece_type == chess.QUEEN:
        return "Queen"
    elif piece.piece_type == chess.KING:
        return "King"
    else:
        return "Pawn"

# Initialize empty lists for each column of the DataFrame
game_id_list = []
white_result_list = []
black_result_list = []
color_list = []
move_number_list = []
piece_list = []
move_from_list = []
move_to_list = []

# Open the PGN files
game_number = 1
for file_num in range(1, 26):
    file_path = f"../data/Magnus_Carlsen{file_num}.pgn"
    with open(file_path) as f:
        while True:
            game = chess.pgn.read_game(f)
            if game is None:
                break

            # Extract information about the game
            headers = game.headers
            game_id = game_number  # Use the game_number as the game ID
            game_number += 1
            result = headers["Result"]
            white_result = 0
            black_result = 0

            if result == "1-0":
                white_result = 1
                black_result = 0
            elif result == "0-1":
                white_result = 0
                black_result = 1

            # Extract the moves
            moves = extract_moves(game)

            # Append the information for the game to the lists
            for color, move_number, piece, move_from, move_to in moves:
                game_id_list.append(game_id)
                white_result_list.append(white_result)
                black_result_list.append(black_result)
                color_list.append(color)
                move_number_list.append(move_number)
                piece_list.append(piece)
                move_from_list.append(move_from)
                move_to_list.append(move_to)

# Create a DataFrame from the extracted data
data = {
    "Game ID": game_id_list,
    "White Result": white_result_list,
    "Black Result": black_result_list,
    "Color": color_list,
    "Move Number": move_number_list,
    "Piece": piece_list,
    "Move From": move_from_list,
    "Move To": move_to_list
}
df = pd.DataFrame(data)
df

Unnamed: 0,Game ID,White Result,Black Result,Color,Move Number,Piece,Move From,Move To
0,1,1,0,White,1,Pawn,d2,d4
1,1,1,0,Black,2,Knight,g8,f6
2,1,1,0,White,3,Pawn,c2,c4
3,1,1,0,Black,4,Pawn,e7,e6
4,1,1,0,White,5,Knight,g1,f3
...,...,...,...,...,...,...,...,...
58126,728,0,0,Black,42,King,g8,h8
58127,728,0,0,White,43,Knight,h6,f7
58128,728,0,0,Black,44,King,h8,g8
58129,728,0,0,White,45,Knight,f7,h6


In [None]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

# Get the move columns from the dataset
move_columns = df.loc[:, 'White Move 1':'Black Move 135']  # Adjust column names accordingly

# Create a list to store the move sequences
move_sequences = []

# Iterate over each row and collect the moves into a sequence
for _, row in move_columns.iterrows():
    move_sequence = row.dropna().tolist()
    move_sequences.append(move_sequence)

# Use MultiLabelBinarizer for one-hot encoding
mlb = MultiLabelBinarizer()
encoded_moves = mlb.fit_transform(move_sequences)

# Create a DataFrame with the encoded moves
encoded_moves_df = pd.DataFrame(encoded_moves, columns=mlb.classes_)
encoded_moves_df

In [None]:
import pandas as pd

# Get the move columns from the dataset
move_columns = df.loc[:, 'White Move 1':'Black Move 135']  # Adjust column names accordingly

# Create a set to store unique moves
unique_moves = set()

# Iterate over each row and collect unique moves
for _, row in move_columns.iterrows():
    unique_moves.update(row.dropna().tolist())

# Create a dictionary to map each unique move to a numerical value
move_to_index = {move: index+1 for index, move in enumerate(unique_moves)}

# Encode the moves with sequential numerical values
encoded_moves = move_columns.applymap(lambda x: move_to_index.get(x))

# Create a DataFrame with the encoded moves
encoded_moves_df = encoded_moves.fillna(0).astype(int)
encoded_moves_df

In [None]:
# Concatenate the encoded move dataframes with the original dataframe
combined_df = pd.concat([df[['White Result', 'Black Result']],
                        encoded_moves_df, encoded_moves_df], axis=1)

In [None]:
combined_df

In [None]:
# train/test split

from sklearn.model_selection import train_test_split

X = combined_df.drop(['White Result', 'Black Result'], axis=1)  # Input features
y = combined_df[['White Result', 'Black Result']]  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Define the architecture of the neural network

from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
history = model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))