# Imports


In [22]:
import pandas as pd
import numpy
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load data

In [24]:
data = pd.read_csv('labeled_chess_positions.csv')


# Define Features
Since the algorithm I have chosen is Random forest I will need to define features for the model to look for.

In [5]:
!pip install python-chess

Collecting python-chess
  Downloading python_chess-1.999-py3-none-any.whl.metadata (776 bytes)
Collecting chess<2,>=1 (from python-chess)
  Downloading chess-1.11.2.tar.gz (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m49.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading python_chess-1.999-py3-none-any.whl (1.4 kB)
Building wheels for collected packages: chess
  Building wheel for chess (setup.py) ... [?25l[?25hdone
  Created wheel for chess: filename=chess-1.11.2-py3-none-any.whl size=147775 sha256=95cced5d0bd061c1589ff09bb01e394c74fa0483807b68c5e6b89206444f4381
  Stored in directory: /root/.cache/pip/wheels/fb/5d/5c/59a62d8a695285e59ec9c1f66add6f8a9ac4152499a2be0113
Successfully built chess
Installing collected packages: chess, python-chess
Successfully installed chess-1.11.2 python-chess-1.999


In [6]:
import chess
import chess.pgn
import chess.engine
import numpy as np

#function for feeature extraction
def extract_features(fen):
    board = chess.Board(fen)
    pieces = {
        "P": board.pieces(chess.PAWN, chess.WHITE),
        "N": board.pieces(chess.KNIGHT, chess.WHITE),
        "B": board.pieces(chess.BISHOP, chess.WHITE),
        "R": board.pieces(chess.ROOK, chess.WHITE),
        "Q": board.pieces(chess.QUEEN, chess.WHITE),
        "K": board.pieces(chess.KING, chess.WHITE),
        "p": board.pieces(chess.PAWN, chess.BLACK),
        "n": board.pieces(chess.KNIGHT, chess.BLACK),
        "b": board.pieces(chess.BISHOP, chess.BLACK),
        "r": board.pieces(chess.ROOK, chess.BLACK),
        "q": board.pieces(chess.QUEEN, chess.BLACK),
        "k": board.pieces(chess.KING, chess.BLACK),
    }

    def count(p):
        return len(pieces[p])

    white_material = count("P") + 3*count("N") + 3*count("B") + 5*count("R") + 9*count("Q")
    black_material = count("p") + 3*count("n") + 3*count("b") + 5*count("r") + 9*count("q")

    def castling_rights():
        return [
            int(board.has_kingside_castling_rights(chess.WHITE)),
            int(board.has_queenside_castling_rights(chess.WHITE)),
            int(board.has_kingside_castling_rights(chess.BLACK)),
            int(board.has_queenside_castling_rights(chess.BLACK)),
        ]

    features = [
        count("P"), count("N"), count("B"), count("R"), count("Q"),
        count("p"), count("n"), count("b"), count("r"), count("q"),
        white_material, black_material, white_material - black_material,
        int(board.turn),
        *castling_rights()
    ]

    return features

## Feature extraction
in the above function I have included basic features needed to evaluate a chess position.


*   Peace count
*   Castling rights
*   Checks (not very important)
*   Turn

Note: these features are very basic and will be pretty week to evaluate a complex position for that I found that a neural network is a better alternative that Random Forest.






In [25]:
feature_rows = data['fen'].apply(extract_features)
X = pd.DataFrame(feature_rows.tolist())

# Encode labels

now we encode the labels in the csv (White Winning, Black Winning, and Draw) as 0, 1, and 2 respectively.

In [27]:
label_mapping = {"White Winning" : 0, "Black Winning" : 1, "Draw" : 2}
Y = data['label'].map(label_mapping)

# Spliting the data


In [28]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


# Train the model

In [29]:
model = RandomForestClassifier(n_estimators=200, random_state=42)

model.fit(X_train, Y_train)

# Test Model

In [30]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(Y_test, y_pred))
print("\nClassification Report:\n", classification_report(Y_test, y_pred, target_names=['White Winning', 'Black Winning', 'Draw']))

Accuracy: 0.7599301615015277

Classification Report:
                precision    recall  f1-score   support

White Winning       0.77      0.74      0.76       747
Black Winning       0.80      0.79      0.79       754
         Draw       0.72      0.74      0.73       790

     accuracy                           0.76      2291
    macro avg       0.76      0.76      0.76      2291
 weighted avg       0.76      0.76      0.76      2291



# Load the model

In [31]:
import joblib

joblib.dump(model, 'random_forest_fen_model.pkl')

['random_forest_fen_model.pkl']