# Elo World Strength Benchmarking

When I saw this competition and decided to join the first thing I thought to myself is that it would be nice to have a way to know how strong the bot im making is without submitting it. 
So I tossed together this notebook that allows you to benchmark any bot you make against different dilutions of stockfish.
Simply use the Benchmark class I wrote, add your player then run some games and you will get a decent estimate for the strength of your bot relative to the dilutions.
I was inspired to make this by DR. TOM MURPHY VII PH.D or suckerpinch on youtube who made both the video "30 Weird Chess Algorithms: Elo World" at "https://www.youtube.com/watch?v=DpXy041BIlA" and a corresponding paper "Elo World, a framework for benchmarking weak chess engines" at "http://tom7.org/chess/weak.pdf"


To allow more general comparisons between engines I suggest that you either use the parameters I have in this notebook or a set of canonical ones I will comment under this notebook. Then when you see an elo score estimated using this notebook youll know how it was created and can measure yourself against it.

# Installing stockfish

In [1]:
!pip install chess elote
# Download the Stockfish binary
!wget https://github.com/official-stockfish/Stockfish/releases/latest/download/stockfish-ubuntu-x86-64-avx2.tar

# Extract the tar file
!tar -xvf stockfish-ubuntu-x86-64-avx2.tar



--2025-01-11 20:31:59--  https://github.com/official-stockfish/Stockfish/releases/latest/download/stockfish-ubuntu-x86-64-avx2.tar
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/official-stockfish/Stockfish/releases/download/sf_17/stockfish-ubuntu-x86-64-avx2.tar [following]
--2025-01-11 20:31:59--  https://github.com/official-stockfish/Stockfish/releases/download/sf_17/stockfish-ubuntu-x86-64-avx2.tar
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/20976138/d2c8ec2e-637e-46b1-b925-79fd3e8503f8?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250111%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250111T193019Z&X-Amz-Expires=300&X-Amz-Signature=cc38f542532fa0db46646fbac0dbe418ec

# Define Benchmarking bot

In [2]:
import chess
import chess.engine
import random
from elote import EloCompetitor
from fractions import Fraction
engine_path = "./stockfish/stockfish-ubuntu-x86-64-avx2"  # Update the path if necessary
engine = chess.engine.SimpleEngine.popen_uci(engine_path)

class StockFish_BOT:
    def __init__(self, percentage_random_moves=0, time_limit=1, engine=None):
        if engine is None:
            engine = chess.engine.SimpleEngine.popen_uci("./stockfish/stockfish-ubuntu-x86-64-avx2")
        else:
            self.engine = engine
        self.rnd_chance = max(min(percentage_random_moves, 1), 0)
        self.time_limit = time_limit
        self.limit = chess.engine.Limit(time=time_limit)
        self.elo = EloCompetitor(initial_rating=400)
        self.dilution_ratio = Fraction(self.rnd_chance/(1.01-self.rnd_chance)).limit_denominator(100)

    def make_move(self, board):
        if random.random() < self.rnd_chance:
            return random.choice(list(board.legal_moves))
        else:
            return engine.play(board, self.limit).move

    def __repr__(self):
        return f"Stockfish Bot - Thinking Time: {self.time_limit} Seconds - Diluted at {self.dilution_ratio} - Random move chance {round(self.rnd_chance, 2)*100}%"
        

In [3]:
import chess
import random
from elote import EloCompetitor
import sys
from pathlib import Path

# Ajoute le chemin du répertoire parent au sys.path
sys.path.append('../')
from agent.simple_agent import action_agent
from evaluator.utils.model_CNN import ChessModelCNN
from evaluator.utils.model_Transformer import load_existing_model

class Personal_BOT:
    def __init__(self, percentage_random_moves=0, time_limit=1):

        self.rnd_chance = max(min(percentage_random_moves, 1), 0)
        self.time_limit = time_limit
        self.elo = EloCompetitor(initial_rating=400)
        self.agent_net = ChessModelCNN()
        self.agent_net = load_existing_model(model=self.agent_net, model_path="../output/CNN_1.1/model_number_1")
        self.as_matrix = True

    def make_move(self, board):
        if random.random() < self.rnd_chance:
            return random.choice(list(board.legal_moves))
        else:
            return action_agent(board, self.agent_net, as_matrix=self.as_matrix)

    def __repr__(self):
        return f"Personnal Bot - Random move chance {round(self.rnd_chance, 2)*100}%"
        

In [4]:
from elote import LambdaArena
from tqdm import tqdm
import numpy as np

class Benchmark:
    def __init__(self, num_baseline_players=10, baseline_player_time_limit=0.01, baseline_engine=None, use_random_percentages=False):
        self.arena = LambdaArena(self.play_game)
        self.num_baseline_players = num_baseline_players
        self.baseline_player_time_limit = baseline_player_time_limit
        if use_random_percentages:
            self.players = [StockFish_BOT(percentage_random_moves = random.random(), time_limit=self.baseline_player_time_limit, engine=baseline_engine) for _ in tqdm(range(self.num_baseline_players))]
        else:
            values = np.linspace(0, 1, self.num_baseline_players)
            self.players = [StockFish_BOT(percentage_random_moves = p, time_limit=self.baseline_player_time_limit, engine=baseline_engine) for p in tqdm(values)]
        

    def add_test_player(self, player):
        self.players.append(player)
    
    def play_game(self, player1, player2):
        board = chess.Board()
        board.push(random.choice(list(board.legal_moves)))
        done = False
    
        while not done:
            board.push(player2.make_move(board))
            
            outcome = board.outcome(claim_draw=True)
            if outcome is not None:
                return outcome.winner
    
            board.push(player1.make_move(board))
            
            outcome = board.outcome(claim_draw=True)
            if outcome is not None:
                return outcome.winner

    def run_games(self, num_games=10):
        matchups = [tuple(random.choices(self.players, k = 2)) for _ in range(num_games)]
        self.arena.tournament(matchups)

    def return_leaderboard(self):
        leaderboard = self.arena.leaderboard()

        min_score = min([i["rating"] for i in leaderboard])

        for player in leaderboard:
            player["rating"] = round((player["rating"] - min_score)+100, 2)

        return leaderboard
        
        

# Example Use

In [5]:
benchmark = Benchmark(num_baseline_players=50, baseline_player_time_limit=0.01, baseline_engine=engine, use_random_percentages=False)

100%|██████████| 50/50 [00:00<00:00, 145433.56it/s]


In [6]:
benchmark.players

[Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 0 - Random move chance 0.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 2/97 - Random move chance 2.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 4/95 - Random move chance 4.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 2/31 - Random move chance 6.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 8/91 - Random move chance 8.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 10/89 - Random move chance 10.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 4/29 - Random move chance 12.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 14/85 - Random move chance 14.000000000000002%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 16/83 - Random move chance 16.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 2/9 - Random move chance 18.0%,
 Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 20/79 - Random move chan

In [7]:
benchmark.add_test_player(Personal_BOT())

In [8]:
benchmark.run_games(num_games=2000)

  4%|▍         | 78/2000 [00:32<13:24,  2.39it/s]


TypeError: Personal_BOT.make_move() missing 1 required positional argument: 'board'

In [8]:
benchmark.return_leaderboard()

[{'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 2996/93 - Random move chance 98.0%,
  'rating': 100.0},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 451/45 - Random move chance 92.0%,
  'rating': 127.8},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 949/72 - Random move chance 94.0%,
  'rating': 144.35},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 553/69 - Random move chance 90.0%,
  'rating': 148.84},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 151/8 - Random move chance 96.0%,
  'rating': 152.48},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 100 - Random move chance 100.0%,
  'rating': 184.15},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 198/41 - Random move chance 84.0%,
  'rating': 184.82},
 {'competitor': Stockfish Bot - Thinking Time: 0.01 Seconds - Diluted at 53/8 - Random move chance 88

# You can use the run_games method at any time

In [None]:
"""

num_rounds = 20
for iteration in range(num_rounds):
    benchmark.run_games(num_games=100)
    print("-"*100)
    print(f"iteration {iteration+1} / {num_rounds}")
    for competitor in benchmark.return_leaderboard():
        print(competitor)
    print("-"*100)

"""

'\n\nnum_rounds = 20\nfor iteration in range(num_rounds):\n    benchmark.run_games(num_games=100)\n    print("-"*100)\n    print(f"iteration {iteration+1} / {num_rounds}")\n    for competitor in benchmark.return_leaderboard():\n        print(competitor)\n    print("-"*100)\n\n'

# Workspace