In [1]:
!pip install python-chess pandas


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Installing collected packages: python-chess
Successfully installed python-chess-1.999


In [2]:
import chess.pgn
import pandas as pd

# File path to the PGN dataset
pgn_file = r"C:\Users\hiddensardar\Downloads\master_games.pgn"

# Initialize a list to store game data
games_data = []

# Open and parse the PGN file
with open(pgn_file) as file:
    while True:
        game = chess.pgn.read_game(file)
        if game is None:
            break
        # Extract details for each game
        game_info = {
            'Event': game.headers.get('Event'),
            'White': game.headers.get('White'),
            'Black': game.headers.get('Black'),
            'Result': game.headers.get('Result'),
            'Moves': [move.uci() for move in game.mainline_moves()]
        }
        games_data.append(game_info)

# Convert the list to a DataFrame for easier analysis
df_games = pd.DataFrame(games_data)

# Display the first few rows of the dataframe
print(df_games.head())


                  Event            White                   Black Result  \
0  January 2 Early 2024  Carlsen, Magnus  Santos Flores, Alberto    1-0   
1  January 2 Early 2024  Carlsen, Magnus     Duda, Jan-Krzysztof    1-0   
2  January 2 Early 2024  Carlsen, Magnus  Mamedyarov, Shakhriyar    1-0   
3  January 2 Early 2024  Carlsen, Magnus        Zhigalko, Sergei    1-0   
4  January 2 Early 2024  Carlsen, Magnus    Donchenko, Alexander    1-0   

                                               Moves  
0  [e2e4, e7e6, b2b3, d7d5, c1b2, d5e4, b1c3, g8f...  
1  [b2b3, d7d5, g1f3, g8f6, c1b2, c8f5, f3h4, f5d...  
2  [g1f3, d7d5, b2b3, g8f6, c1b2, g7g6, c2c4, d5c...  
3  [h2h4, e7e5, d2d4, e5d4, d1d4, b8c6, d4a4, g8f...  
4  [h2h3, g8f6, g2g4, e7e6, g4g5, f6d5, e2e4, d5b...  


In [4]:
df_games.head()

Unnamed: 0,Event,White,Black,Result,Moves
0,January 2 Early 2024,"Carlsen, Magnus","Santos Flores, Alberto",1-0,"[e2e4, e7e6, b2b3, d7d5, c1b2, d5e4, b1c3, g8f..."
1,January 2 Early 2024,"Carlsen, Magnus","Duda, Jan-Krzysztof",1-0,"[b2b3, d7d5, g1f3, g8f6, c1b2, c8f5, f3h4, f5d..."
2,January 2 Early 2024,"Carlsen, Magnus","Mamedyarov, Shakhriyar",1-0,"[g1f3, d7d5, b2b3, g8f6, c1b2, g7g6, c2c4, d5c..."
3,January 2 Early 2024,"Carlsen, Magnus","Zhigalko, Sergei",1-0,"[h2h4, e7e5, d2d4, e5d4, d1d4, b8c6, d4a4, g8f..."
4,January 2 Early 2024,"Carlsen, Magnus","Donchenko, Alexander",1-0,"[h2h3, g8f6, g2g4, e7e6, g4g5, f6d5, e2e4, d5b..."


In [5]:
# Display the total number of games
total_games = len(df_games)
print(f"Total number of games: {total_games}")

# Count the distribution of results
result_distribution = df_games['Result'].value_counts()
print("\nResult distribution:")
print(result_distribution)

# Display the first few games to inspect opening moves
opening_moves = df_games['Moves'].apply(lambda moves: moves[:5])  # First 5 moves
df_games['Opening_Moves'] = opening_moves
print("\nFirst few opening moves from the games:")
print(df_games[['White', 'Black', 'Opening_Moves']].head())


Total number of games: 25

Result distribution:
Result
1-0    25
Name: count, dtype: int64

First few opening moves from the games:
             White                   Black                   Opening_Moves
0  Carlsen, Magnus  Santos Flores, Alberto  [e2e4, e7e6, b2b3, d7d5, c1b2]
1  Carlsen, Magnus     Duda, Jan-Krzysztof  [b2b3, d7d5, g1f3, g8f6, c1b2]
2  Carlsen, Magnus  Mamedyarov, Shakhriyar  [g1f3, d7d5, b2b3, g8f6, c1b2]
3  Carlsen, Magnus        Zhigalko, Sergei  [h2h4, e7e5, d2d4, e5d4, d1d4]
4  Carlsen, Magnus    Donchenko, Alexander  [h2h3, g8f6, g2g4, e7e6, g4g5]


In [None]:
"C:\Users\hiddensardar\Downloads\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2.exe"

In [7]:
import chess.engine

# Path to your Stockfish executable
stockfish_path = r"C:\Users\hiddensardar\Downloads\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2.exe"

def get_top_moves(moves, stockfish_path, num_moves=10):
    # Create a new chess board
    board = chess.Board()
    
    # Execute moves from the game
    for move in moves:
        board.push(chess.Move.from_uci(move))
    
    # Initialize the Stockfish engine
    with chess.engine.SimpleEngine.popen_uci(stockfish_path) as engine:
        # Get the top moves
        top_moves = engine.play(board, chess.engine.Limit(time=2.0), options={"UCI_LimitStrength": "true"})
        best_move = top_moves.move
        
        # Analyze to get multiple best moves
        info = engine.analyse(board, chess.engine.Limit(time=2.0), multipv=num_moves)
        
    # Return the top moves
    return [(info[i]["pv"][0], info[i]["score"].relative.score()) for i in range(num_moves)]

# Example usage with the first game in the DataFrame
first_game_moves = df_games['Moves'].iloc[0]
top_moves = get_top_moves(first_game_moves, stockfish_path)

print("\nTop moves with their scores:")
for move, score in top_moves:
    print(f"Move: {move}, Score: {score}")



Top moves with their scores:
Move: a3d6, Score: None
Move: a3b2, Score: None
Move: d7e8, Score: None
Move: c6c5, Score: None
Move: h6h5, Score: None
Move: b7b6, Score: None
Move: f7f6, Score: None
Move: f7f5, Score: None
Move: c6b5, Score: None
Move: a3c1, Score: None


In [8]:
import chess.engine
stockfish_path = r"C:\Users\hiddensardar\Downloads\stockfish-windows-x86-64-avx2\stockfish\stockfish-windows-x86-64-avx2.exe"

def get_top_moves(moves, stockfish_path, num_moves=10):
    # Create a new chess board
    board = chess.Board()
    
    # Execute moves from the game
    for move in moves:
        board.push(chess.Move.from_uci(move))
    
    # Initialize the Stockfish engine
    with chess.engine.SimpleEngine.popen_uci(stockfish_path) as engine:
        # Analyze to get multiple best moves
        info = engine.analyse(board, chess.engine.Limit(time=2.0), multipv=num_moves)
        
        # Extract the top moves and their evaluation scores
        top_moves = []
        for i in range(num_moves):
            move_info = info[i]
            move = move_info['pv'][0]  # Principal variation (top move)
            score = move_info['score'].relative  # Score relative to the current position
            if score.is_mate():
                score_value = f"M{score.mate()}"  # Mate in X moves
            else:
                score_value = score.score()  # Standard evaluation score
            top_moves.append((move, score_value))
    
    # Return the top moves with their scores
    return top_moves

# Example usage with the first game in the DataFrame
first_game_moves = df_games['Moves'].iloc[0]
top_moves = get_top_moves(first_game_moves, stockfish_path)

print("\nTop moves with their scores:")
for move, score in top_moves:
    print(f"Move: {move}, Score: {score}")



Top moves with their scores:
Move: a3d6, Score: M-4
Move: a3b2, Score: M-4
Move: d7e8, Score: M-2
Move: a3c5, Score: M-1
Move: a3f8, Score: M-1
Move: a3e7, Score: M-1
Move: c6b5, Score: M-1
Move: a3c1, Score: M-1
Move: f7f6, Score: M-1
Move: f7f5, Score: M-1


In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Extracting the pieces moved from the moves list
def extract_piece_from_move(move):
    return move[0]  # The first character represents the piece in UCI format (e.g., 'e2e4' -> 'e')

# Prepare the dataset
df_games['Piece_Moved'] = df_games['Moves'].apply(lambda moves: [extract_piece_from_move(move) for move in moves])

# Flatten the data for training
all_moves = []
all_pieces = []
for i in range(len(df_games)):
    all_moves.extend(df_games['Moves'].iloc[i])
    all_pieces.extend(df_games['Piece_Moved'].iloc[i])

# Encode the pieces for model training
le = LabelEncoder()
encoded_pieces = le.fit_transform(all_pieces)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(np.array(all_moves).reshape(-1, 1), encoded_pieces, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
rf_piece_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_piece_model.fit(X_train, y_train)

# Evaluate the model
accuracy = rf_piece_model.score(X_test, y_test)
print(f"Piece Model Accuracy: {accuracy:.2f}")


ValueError: could not convert string to float: 'c1c6'

In [10]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Convert chess board squares to numerical values
def square_to_index(square):
    file = ord(square[0]) - ord('a')  # 'a' to 'h' -> 0 to 7
    rank = int(square[1]) - 1  # '1' to '8' -> 0 to 7
    return file + rank * 8  # Unique index from 0 to 63

# Convert UCI move (e.g., 'c1c6') to numerical features
def move_to_features(move):
    start_square = move[:2]
    end_square = move[2:4]
    return [square_to_index(start_square), square_to_index(end_square)]

# Prepare the dataset by encoding moves
all_moves = []
all_pieces = []
for i in range(len(df_games)):
    for move, piece in zip(df_games['Moves'].iloc[i], df_games['Piece_Moved'].iloc[i]):
        move_features = move_to_features(move)
        all_moves.append(move_features)
        all_pieces.append(piece)

# Convert lists to NumPy arrays
X = np.array(all_moves)  # The features are the move (start, end squares)
y = LabelEncoder().fit_transform(all_pieces)  # Encode the pieces

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
rf_piece_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_piece_model.fit(X_train, y_train)

# Evaluate the model
accuracy = rf_piece_model.score(X_test, y_test)
print(f"Piece Model Accuracy: {accuracy:.2f}")


Piece Model Accuracy: 0.81


In [11]:
# Extract end squares from moves (e.g., 'c1c6' -> 'c6')
def extract_end_square_from_move(move):
    return move[2:4]

# Prepare the dataset for the Position Model
df_games['End_Square'] = df_games['Moves'].apply(lambda moves: [extract_end_square_from_move(move) for move in moves])

# Flatten the data for training
all_end_squares = []
for i in range(len(df_games)):
    all_end_squares.extend(df_games['End_Square'].iloc[i])

# Encode the end squares
encoded_end_squares = [square_to_index(square) for square in all_end_squares]  # Convert square to numeric index

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(np.array(all_moves), encoded_end_squares, test_size=0.2, random_state=42)

# Train a Random Forest Classifier for the Position Model
rf_position_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_position_model.fit(X_train, y_train)

# Evaluate the model
position_accuracy = rf_position_model.score(X_test, y_test)
print(f"Position Model Accuracy: {position_accuracy:.2f}")


Position Model Accuracy: 0.90


In [12]:
from sklearn.ensemble import RandomForestRegressor

# Example: Simulate time data for each move (you can replace this with real time data)
import random
df_games['Move_Time'] = df_games['Moves'].apply(lambda moves: [random.uniform(0.5, 20.0) for _ in moves])

# Flatten the move times
all_move_times = []
for i in range(len(df_games)):
    all_move_times.extend(df_games['Move_Time'].iloc[i])

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(np.array(all_moves), all_move_times, test_size=0.2, random_state=42)

# Train a Random Forest Regressor for the Time Model
rf_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_time_model.fit(X_train, y_train)

# Evaluate the model
time_model_r2 = rf_time_model.score(X_test, y_test)
print(f"Time Model R² Score: {time_model_r2:.2f}")


Time Model R² Score: -0.28


In [13]:
# Add move number as a feature
def move_number_feature(moves):
    return list(range(1, len(moves) + 1))

df_games['Move_Number'] = df_games['Moves'].apply(move_number_feature)

# Flatten the data for training
all_move_numbers = []
for i in range(len(df_games)):
    all_move_numbers.extend(df_games['Move_Number'].iloc[i])

# Rebuild the feature set to include move number and piece moved
X = []
for i in range(len(all_moves)):
    move_features = list(all_moves[i])  # Start and end square
    move_features.append(all_move_numbers[i])  # Add move number as a feature
    X.append(move_features)

# Convert to NumPy array
X = np.array(X)

# Train a Random Forest Regressor again with the additional feature
X_train, X_test, y_train, y_test = train_test_split(X, all_move_times, test_size=0.2, random_state=42)

rf_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_time_model.fit(X_train, y_train)

# Evaluate the model
time_model_r2 = rf_time_model.score(X_test, y_test)
print(f"Time Model R² Score with Move Number: {time_model_r2:.2f}")


Time Model R² Score with Move Number: -0.22


In [14]:
# Define a mapping for pieces
piece_mapping = {
    'P': 1,  # Pawn
    'N': 2,  # Knight
    'B': 3,  # Bishop
    'R': 4,  # Rook
    'Q': 5,  # Queen
    'K': 6   # King
}

# Extract piece types from moves (example: if move is 'c2c3', it's a pawn move)
def extract_piece_type(move):
    return move[0].upper()  # Assuming the first letter denotes the piece, adjust if necessary

# Prepare the dataset for Time Model with new features
df_games['Piece_Type'] = df_games['Moves'].apply(lambda moves: [extract_piece_type(move) for move in moves])

# Convert piece types to numerical values
all_piece_types = []
for i in range(len(df_games)):
    all_piece_types.extend(df_games['Piece_Type'].iloc[i])

# Include piece type and game phase in the features
X = []
for i in range(len(all_moves)):
    move_features = list(all_moves[i])  # Start and end square
    move_features.append(all_move_numbers[i])  # Add move number
    move_features.append(piece_mapping.get(all_piece_types[i], 0))  # Add piece type
    game_phase = (all_move_numbers[i] // 10)  # Simplistic game phase determination
    move_features.append(game_phase)  # Add game phase
    X.append(move_features)

# Convert to NumPy array
X = np.array(X)

# Train a Random Forest Regressor again with the additional features
X_train, X_test, y_train, y_test = train_test_split(X, all_move_times, test_size=0.2, random_state=42)

rf_time_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_time_model.fit(X_train, y_train)

# Evaluate the model
time_model_r2 = rf_time_model.score(X_test, y_test)
print(f"Time Model R² Score with Piece Type and Game Phase: {time_model_r2:.2f}")


Time Model R² Score with Piece Type and Game Phase: -0.23
