### Introduction

This notebook analyzes chess games using econometric methods and implements a simple chess engine to illustrate practical application.
Key goals:
- Quantify the impact of material differences and Elo ratings on game outcomes
- Build a predictive logistic regression model
- Use econometric insights to evaluate chess positions 
- Implement minimax with alpha beta pruning for decision making

Installing the relevant packages

In [43]:
# Install required packages
!pip install python-chess pandas numpy statsmodels tqdm matplotlib ipywidgets

import chess
import chess.pgn
import pandas as pd
import numpy as np
import statsmodels.api as sm
from tqdm import tqdm
import ipywidgets as widgets
from IPython.display import display, clear_output, SVG




Parsing PGN files. Using the Lichess database, I downloaded an archive from 2017 consisting on 11 million games

In [44]:
PGN_PATH = r"C:\Users\HP\Desktop\chess data\lichess_db_standard_rated_2017-03.pgn"

def parse_games(pgn_path, max_games=3000):
    games = []
    with open(pgn_path, encoding="utf-8") as pgn:
        for _ in tqdm(range(max_games)): #tdqm for progress bar
            game = chess.pgn.read_game(pgn)
            if game is None:
                break
            headers = game.headers
            result = headers["Result"]
            if result == "1-0":
                outcome = 1
            elif result == "0-1":
                outcome = -1
            else:
                outcome = 0
            games.append({
                "white_elo": int(headers.get("WhiteElo", 0)), #Elo is the player rating for white
                "black_elo": int(headers.get("BlackElo", 0)), #Elo is the player rating for black
                "result": outcome, #Outcome, 1 if white wins, -1 if black wins, 0 if draw
                "moves": game
            })
    return games

#All variables are defined from White‚Äôs perspective. No separate indicator for player color is required
#Color is implicitly encoded through the sign of the features, +ve -> White advantage, -ve -> Black advantage

games = parse_games(PGN_PATH, max_games=3000)
print("Number of games parsed:", len(games))


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3000/3000 [00:16<00:00, 186.05it/s]

Number of games parsed: 3000





Extracting the features and preparing the dataset using difference in pieces on the board (material) between white and black

In [45]:
def extract_piece_diff_features(game, ply=20): # ply represents half-moves, 1 by white or black
    board = game.board()
    for i, move in enumerate(game.mainline_moves()):
        board.push(move)
        if i + 1 >= ply:
            break
    features = {
        "pawn_diff": len(board.pieces(chess.PAWN, chess.WHITE)) - len(board.pieces(chess.PAWN, chess.BLACK)),
        "knight_diff": len(board.pieces(chess.KNIGHT, chess.WHITE)) - len(board.pieces(chess.KNIGHT, chess.BLACK)),
        "bishop_diff": len(board.pieces(chess.BISHOP, chess.WHITE)) - len(board.pieces(chess.BISHOP, chess.BLACK)),
        "rook_diff": len(board.pieces(chess.ROOK, chess.WHITE)) - len(board.pieces(chess.ROOK, chess.BLACK)),
        "queen_diff": len(board.pieces(chess.QUEEN, chess.WHITE)) - len(board.pieces(chess.QUEEN, chess.BLACK)),
    }
    return features #Computes the difference in each piece between white and black


Create dataframe for logistic regression

In [46]:
rows = [] #empty list
for g in tqdm(games): 
    feats = extract_piece_diff_features(g["moves"])
    rows.append({
        "result": g["result"], 
        "elo_diff": g["white_elo"] - g["black_elo"],
        **feats
    })

df_piece = pd.DataFrame(rows)
df_piece = df_piece[df_piece["result"] != 0].copy() #remove draws since logistic regression only works for binary classification
df_piece["win"] = (df_piece["result"] == 1).astype(int) #binary dependent variable, 1 if white wins, 0 if black wins


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3000/3000 [00:00<00:00, 3605.98it/s]


Logistic regression

In [47]:
X = df_piece[["pawn_diff","knight_diff","bishop_diff","rook_diff","queen_diff","elo_diff"]]
X = sm.add_constant(X)
y = df_piece["win"]

piece_model = sm.Logit(y, X).fit() #Logistic regression model to predict probability of white winning based on piece differences and Elo difference
print(piece_model.summary()) 

odds_ratios = np.exp(piece_model.params) # Odds ratio to interpret effect sizes



Optimization terminated successfully.
         Current function value: 0.621484
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                    win   No. Observations:                 2882
Model:                          Logit   Df Residuals:                     2875
Method:                           MLE   Df Model:                            6
Date:                Mon, 29 Dec 2025   Pseudo R-squ.:                  0.1028
Time:                        18:30:58   Log-Likelihood:                -1791.1
converged:                       True   LL-Null:                       -1996.4
Covariance Type:            nonrobust   LLR p-value:                 1.456e-85
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.1139      0.041      2.773      0.006       0.033       0.194
pawn_diff       0.1449    

Odds ratio to interpret effect sizes

In [48]:
np.exp(piece_model.params)

const          1.120667
pawn_diff      1.155877
knight_diff    1.660888
bishop_diff    1.624333
rook_diff      2.631051
queen_diff     3.860692
elo_diff       1.004657
dtype: float64

Interpretation of coefficients

In [49]:
#White 12% more likely to win each game, First mover advantage
#Each additional pawn advantage increases odds of winning by 15%
#Knight and bishop advantages have similar effects, around 66% and 62% repectively increase in odds of winning
#One extra rook increases the odds of winning by 163%
#Having a queen advantage is the biggest, making probablility of winning 3.9 times higher
#Each additional Elo point increases White‚Äôs odds of winning by 0.47%, rating diffenerence matters
#+100 Elo  results in 57% higher odds


Hand-coded Evaluation Function

In [50]:
def hand_coded_evaluate(board):
    import numpy as np

    # #Standard chess material weights.
    values = {
        chess.PAWN: 100,
        chess.KNIGHT: 320,
        chess.BISHOP: 330,
        chess.ROOK: 500,
        chess.QUEEN: 900,
        chess.KING: 50000
    }

    # Piece-square tables (mirrored for Black)
    pawn_table = np.array([
        [0, 0, 0, 0, 0, 0, 0, 0],
        [50,50,50,50,50,50,50,50],
        [10,10,20,30,30,20,10,10],
        [5,5,10,25,25,10,5,5],
        [0,0,0,20,20,0,0,0],
        [5,-5,-10,0,0,-10,-5,5],
        [5,10,10,-20,-20,10,10,5],
        [0,0,0,0,0,0,0,0]
    ])

    knight_table = np.array([
        [-50,-40,-30,-30,-30,-30,-40,-50],
        [-40,-20,0,0,0,0,-20,-40],
        [-30,0,10,15,15,10,0,-30],
        [-30,5,15,20,20,15,5,-30],
        [-30,0,15,20,20,15,0,-30],
        [-30,5,10,15,15,10,5,-30],
        [-40,-20,0,5,5,0,-20,-40],
        [-50,-40,-30,-30,-30,-30,-40,-50]
    ])

    bishop_table = np.array([
        [-20,-10,-10,-10,-10,-10,-10,-20],
        [-10,0,0,0,0,0,0,-10],
        [-10,0,5,10,10,5,0,-10],
        [-10,5,5,10,10,5,5,-10],
        [-10,0,10,10,10,10,0,-10],
        [-10,10,10,10,10,10,10,-10],
        [-10,5,0,0,0,0,5,-10],
        [-20,-10,-10,-10,-10,-10,-10,-20]
    ])

    rook_table = np.array([
        [0,0,0,0,0,0,0,0],
        [5,10,10,10,10,10,10,5],
        [-5,0,0,0,0,0,0,-5],
        [-5,0,0,0,0,0,0,-5],
        [-5,0,0,0,0,0,0,-5],
        [-5,0,0,0,0,0,0,-5],
        [-5,0,0,0,0,0,0,-5],
        [0,0,0,5,5,0,0,0]
    ])

    queen_table = np.array([
        [-20,-10,-10,-5,-5,-10,-10,-20],
        [-10,0,0,0,0,0,0,-10],
        [-10,0,5,5,5,5,0,-10],
        [-5,0,5,5,5,5,0,-5],
        [0,0,5,5,5,5,0,-5],
        [-10,5,5,5,5,5,0,-10],
        [-10,0,5,0,0,0,0,-10],
        [-20,-10,-10,-5,-5,-10,-10,-20]
    ])

    king_table = np.array([
        [-30,-40,-40,-50,-50,-40,-40,-30],
        [-30,-40,-40,-50,-50,-40,-40,-30],
        [-30,-40,-40,-50,-50,-40,-40,-30],
        [-30,-40,-40,-50,-50,-40,-40,-30],
        [-20,-30,-30,-40,-40,-30,-30,-20],
        [-10,-20,-20,-20,-20,-20,-20,-10],
        [20,20,0,0,0,0,20,20],
        [20,30,10,0,0,10,30,20]
    ])

    score = 0
    mobility = len(list(board.legal_moves))

    if board.is_check():
        return -500 if board.turn == chess.WHITE else 500


    for sq in chess.SQUARES:
        piece = board.piece_at(sq)
        if piece:
            val = values[piece.piece_type] #Calculates value of the piece in each square, 0 if empty squar
            row, col = divmod(sq, 8)
            
            # Piece-square bonus
            if piece.piece_type == chess.PAWN:
                pos_bonus = pawn_table[row][col]
            elif piece.piece_type == chess.KNIGHT:
                pos_bonus = knight_table[row][col]
            elif piece.piece_type == chess.BISHOP:
                pos_bonus = bishop_table[row][col]
            elif piece.piece_type == chess.ROOK:
                pos_bonus = rook_table[row][col]
            elif piece.piece_type == chess.QUEEN:
                pos_bonus = queen_table[row][col]
            elif piece.piece_type == chess.KING:
                pos_bonus = king_table[row][col]
            else:
                pos_bonus = 0
            
            piece_score = val + pos_bonus
            score += piece_score if piece.color == chess.WHITE else -piece_score #score positive for white pieces, negative for black pieces

    # Combine material + mobility
    score += mobility * 10  # Weight mobility

    return score if board.turn == chess.WHITE else -score #Score from the perspective of the player to move


Evaluation Function based on Logistic Regression

In [51]:
def econometric_evaluate(board, params):
    if board.is_checkmate():
        return -9999 if board.turn==chess.WHITE else 9999 # model extreme for checkmate

    pawn_diff = len(board.pieces(chess.PAWN, chess.WHITE)) - len(board.pieces(chess.PAWN, chess.BLACK)) #pawn difference
    knight_diff = len(board.pieces(chess.KNIGHT, chess.WHITE)) - len(board.pieces(chess.KNIGHT, chess.BLACK)) #knight difference
    bishop_diff = len(board.pieces(chess.BISHOP, chess.WHITE)) - len(board.pieces(chess.BISHOP, chess.BLACK))  #bishop difference
    rook_diff = len(board.pieces(chess.ROOK, chess.WHITE)) - len(board.pieces(chess.ROOK, chess.BLACK)) #rook difference
    queen_diff = len(board.pieces(chess.QUEEN, chess.WHITE)) - len(board.pieces(chess.QUEEN, chess.BLACK)) #queen difference

    score = ( #linear predictor for score function
        params["const"]
        + params["pawn_diff"] * pawn_diff
        + params["knight_diff"] * knight_diff
        + params["bishop_diff"] * bishop_diff
        + params["rook_diff"] * rook_diff
        + params["queen_diff"] * queen_diff 
    )
    return score if board.turn==chess.WHITE else -score 

Minimax with Alpha-Beta Pruning

In [52]:
def minimax(board, depth, alpha, beta, maximizing):
    if depth == 0 or board.is_game_over():
        return evaluate_board(board)

    if maximizing: #True for player to move
        max_eval = -float("inf")
        for move in board.legal_moves: #Iterate through all legal moves
            board.push(move) #Temporarily make the move
            eval = minimax(board, depth - 1, alpha, beta, False)
            board.pop() #Undo the move 
            max_eval = max(max_eval, eval) #Update evaluation with best move found
            alpha = max(alpha, eval) 
            if beta <= alpha: #no need to explore furtherin this branch, better alternative already found
                break
        return max_eval
    else:
        min_eval = float("inf")
        for move in board.legal_moves:
            board.push(move)
            eval = minimax(board, depth - 1, alpha, beta, True)
            board.pop()
            min_eval = min(min_eval, eval)
            beta = min(beta, eval)
            if beta <= alpha:
                break
        return min_eval

#Engine selects move that maximies worst-case scenario for opponent
#Max chooses the best move assuming the opponent will choose the worst reply for you

Create an interactive chess board display

Enforecing the gameplay rules

In [53]:
# =========================
# Chess interactive engine (all-in-one)
# =========================

# Function to display board
def show_board(board):
    display(SVG(chess.svg.board(board=board, size=400)))

# Current board state
current_board = chess.Board()

# Engine search depth
SEARCH_DEPTH = 4

# UI widgets
text_input = widgets.Text(
    placeholder="Enter move in UCI (e.g. e2e4)",
    description="Your move:"
)
move_button = widgets.Button(description="Play Move", button_style="primary")
reset_button = widgets.Button(description="Reset Game", button_style="warning")
output = widgets.Output()

# Dropdown to select evaluation model
model_dropdown = widgets.Dropdown(
    options=['Hand-coded','Piece-diff Logit'],
    value='Piece-diff Logit',
    description='Evaluator:'
)

# Function to evaluate current board using selected model
def evaluate_board(board):
    if model_dropdown.value == 'Hand-coded':
        return hand_coded_evaluate(board)
    else:
        return econometric_evaluate(board, piece_model.params)

# Move handler
def on_move_clicked(b):
    with output:
        clear_output(wait=True)
        move_str = text_input.value.strip().lower()

        # Parse and validate move
        try:
            user_move = chess.Move.from_uci(move_str)
        except:
            print("‚ùå Invalid move format. Use e2e4")
            show_board(current_board)
            return
        if user_move not in current_board.legal_moves:
            print("‚ùå Illegal move.")
            show_board(current_board)
            return

        # Execute player move
        current_board.push(user_move)
        if current_board.is_game_over():
            show_board(current_board)
            print("üèÅ Game over:", current_board.result())
            return

        # Engine move using minimax
        best_move = None
        best_value = -float("inf")
        for move in current_board.legal_moves:
            current_board.push(move)
            value = minimax(current_board, SEARCH_DEPTH - 1, -float("inf"), float("inf"), False)
            current_board.pop()
            if value > best_value:
                best_value = value
                best_move = move

        # Execute engine move
        current_board.push(best_move)
        print(f"ü§ñ Engine plays: {best_move}")
        show_board(current_board)
        print(f"Evaluation ({model_dropdown.value}): {evaluate_board(current_board):.2f}")

        # Reset input box
        text_input.value = ""

# Reset handler
def on_reset_clicked(b):
    global current_board
    current_board = chess.Board()
    with output:
        clear_output(wait=True)
        show_board(current_board)
        print("‚ôüÔ∏è Game reset. New game started.")

# Connect buttons
move_button.on_click(on_move_clicked)
reset_button.on_click(on_reset_clicked)

# Display UI
display(
    widgets.VBox([
        model_dropdown,
        widgets.HBox([text_input, move_button, reset_button]),
        output
    ])
)

# Initial board state
with output:
    show_board(current_board)


VBox(children=(Dropdown(description='Evaluator:', index=1, options=('Hand-coded', 'Piece-diff Logit'), value='‚Ä¶