In [16]:
import torch
import chess.pgn
import chess.engine

In [17]:
rating_ranges = [
    (800, 1000), (1000, 1200), (1200, 1400), (1400, 1600), (1600, 1800),
    (1800, 2000), (2000, 2200), (2200, 2400), (2400, 2600), (2600, 3000)
]

In [18]:
chess_games = {rating: [] for rating in rating_ranges}
min_elo = 9999
max_elo = 0

games_per_rating = 2000
start_index = 0

for rating_range in rating_ranges:
    start = start_index
    lower_bound = rating_range[0]
    upper_bound = rating_range[1]
    
    file = f"outputs/{str(lower_bound)}-{str(upper_bound)}.pgn"


    with open(file) as f:
        while len(chess_games[rating_range]) < games_per_rating:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            if any(time_control in game.headers["Event"] for time_control in [
                "Correspondence", "Daily", "Classical", "Bullet", "UltraBullet"
            ]):
                continue
            if game.headers["WhiteElo"] == "?" or game.headers["BlackElo"] == "?":
                continue
            if (
                not lower_bound <= int(game.headers["WhiteElo"]) <= upper_bound
                and not lower_bound <= int(game.headers["BlackElo"]) <= upper_bound
            ):
                continue
            if not game.mainline_moves():
                continue
            if len(list(game.mainline_moves())) < 15:
                continue
            if start > 0:
                start -= 1
                continue
            
            chess_games[rating_range].append(game)
            
            min_elo = min(min_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            max_elo = max(max_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            
    print(f"{rating_range} games: {len(chess_games[rating_range])} done")
    
print(f"Min elo: {min_elo}, Max elo: {max_elo}")

(800, 1000) games: 2000 done
(1000, 1200) games: 2000 done
(1200, 1400) games: 2000 done
(1400, 1600) games: 2000 done
(1600, 1800) games: 2000 done
(1800, 2000) games: 2000 done
(2000, 2200) games: 2000 done
(2200, 2400) games: 2000 done
(2400, 2600) games: 2000 done
(2600, 3000) games: 2000 done
Min elo: 800, Max elo: 2973


### Analyze the games

In [5]:
def pad_tensor(tensor, length, pad_value):
    return torch.cat((tensor, torch.ones(length - len(tensor)) * pad_value))

def analyze_game(game, engine=None, nof_moves=10):
    board = game.board()
    analysis = []
    
    close_engine = False
    mate_score = 1_000
    if engine is None:
        close_engine = True
        engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")
    
    for i, move in enumerate(game.mainline_moves()):
        # Enfine evaluation before the move
        top_moves = engine.analyse(board, chess.engine.Limit(time=0.1), multipv=nof_moves)
        # A tensor with the score of the top nof_moves moves (normalized to be between -1 and 1)
        top_moves_tensor = torch.Tensor([eval["score"].relative.score(mate_score=mate_score) for eval in top_moves]) 
        # Pad the tensor if there are less than nof_moves legal moves
        top_moves_tensor = pad_tensor(top_moves_tensor, nof_moves, -mate_score) / mate_score
        # Win, draw, loss chance tensor before the move
        before_wdl = top_moves[0]["score"].relative.wdl()
        before_wdl_tensor = torch.Tensor([
            before_wdl.winning_chance(), 
            before_wdl.drawing_chance(), 
            before_wdl.losing_chance()
        ])
        board.push(move)
        
        # Engine evaluation after the move
        after_move = engine.analyse(board, chess.engine.Limit(time=0.1))
        # Now it's the opponent's turn so negate the score
        after_move_tensor = torch.Tensor([after_move["score"].relative.score(mate_score=mate_score) * -1]) / mate_score
        # Reverse the list so that it's from the perspective of the player who just moved
        after_wdl = after_move["score"].relative.wdl()
        after_wdl_tensor = torch.Tensor([
            after_wdl.losing_chance(), 
            after_wdl.drawing_chance(), 
            after_wdl.winning_chance()
        ])

        analysis.append(torch.cat((
            top_moves_tensor, before_wdl_tensor, 
            after_move_tensor, after_wdl_tensor
        )))
        # print(f"before move: {move}, top_moves {top_moves_tensor}, wdl {before_wdl_tensor}")
        # print(f"after:               top_moves {after_move_tensor}, wdl {after_wdl_tensor}")
    
    # Pad the game if it ends on white's turn to batch white's and black's analysis together
    if len(analysis) % 2:
        analysis.append(torch.ones_like(analysis[0]) * (-mate_score))
    
    white_analysis = torch.stack([position for position in analysis[::2]])
    black_analysis = torch.stack([position for position in analysis[1::2]])
    
    
    white_elo = int(game.headers["WhiteElo"])
    black_elo = int(game.headers["BlackElo"])
    
    if close_engine:
        engine.close()
    
    return torch.stack((white_analysis, black_analysis)), torch.Tensor([white_elo, black_elo])

def analyze_games(batch_number, games, end=None):
    analysis = []
    elo = []
    print(f"started batch {batch_number}")
    engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")

    for i, game in enumerate(games[:end]):
        analysis_tensor, elo_tensor = analyze_game(game, engine)
        analysis.append(analysis_tensor)
        elo.append(elo_tensor)
        
        if i % 20 == 19:
            print(f"batch {batch_number}: game {i + 1} done")
            
    engine.close()
    print(f"batch {batch_number} done")
    
    return analysis, elo

In [6]:
import multiprocessing as mp

all_games = sum(chess_games.values(), [])

cores = mp.cpu_count()
print(f"Using {cores} cores")
games_per_core = len(all_games) / cores

pool = mp.Pool(processes=mp.cpu_count())

args = [(i, all_games[int(i * games_per_core) : int((i + 1) * games_per_core)]) for i in range(0, cores)]
# args = [{game, i} for i, game in enumerate(all_games)]

# output = pool.starmap(analyze_game, args)
output = pool.starmap(analyze_games, args)

pool.close()
pool.join()

Using 8 cores
started batch 0
started batch 1
started batch 2
started batch 3
started batch 4
started batch 5
started batch 7
started batch 6
batch 0: game 20 done
batch 3: game 20 done
batch 1: game 20 done
batch 7: game 20 done
batch 2: game 20 done
batch 6: game 20 done
batch 4: game 20 done
batch 5: game 20 done
batch 0: game 40 done
batch 1: game 40 done
batch 3: game 40 done
batch 2: game 40 done
batch 7: game 40 done
batch 6: game 40 done
batch 4: game 40 done
batch 0: game 60 done
batch 5: game 40 done
batch 1: game 60 done
batch 3: game 60 done
batch 2: game 60 done
batch 1: game 80 done
batch 0: game 80 done
batch 6: game 60 done
batch 4: game 60 done
batch 7: game 60 done
batch 5: game 60 done
batch 3: game 80 done
batch 2: game 80 done
batch 0: game 100 done
batch 1: game 100 done
batch 4: game 80 done
batch 6: game 80 done
batch 7: game 80 done
batch 3: game 100 done
batch 1: game 120 done
batch 5: game 80 done
batch 2: game 100 done
batch 0: game 120 done
batch 4: game 10

In [14]:
analysis = [game for games, _ in output for game in games]
elo = [e for _, elos in output for e in elos]
print(len(analysis), len(elo))
print(start_index)

5000 5000
1500


In [15]:
torch.save((analysis, elo), f"analysis/all_analysis_{start_index}-{start_index + games_per_rating}.pt")

### Convert the board positions to computer readable notation

In [21]:
def fen_to_bitboard(fen_str):
    bitboard = torch.zeros(12, 64)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    
    mapings = {
        "P": 0, "N": 1, "B": 2, "R": 3, "Q": 4, "K": 5,
        "p": 6, "n": 7, "b": 8, "r": 9, "q": 10, "k": 11,
    }
    
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            bitboard[mapings[char], row * 8 + col] = 1
            col += 1
    # Flatten the bitboard and add whose move it is
    return torch.cat((torch.tensor([1 if move == "w" else -1]), bitboard.flatten()))

def fen_to_bitboard_mirror(fen_str):
    bitboard = torch.zeros(12, 64)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    
    mapings = {
        "P": 0, "N": 1, "B": 2, "R": 3, "Q": 4, "K": 5,
        "p": 6, "n": 7, "b": 8, "r": 9, "q": 10, "k": 11,
    }   if move == "w" else {
        "p": 0, "n": 1, "b": 2, "r": 3, "q": 4, "k": 5,
        "P": 6, "N": 7, "B": 8, "R": 9, "Q": 10, "K": 11,
    } 
    fen = fen if move == "w" else fen[::-1]
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            bitboard[mapings[char], row * 8 + col] = 1
            col += 1
    # Flatten the bitboard and add whose move it is
    return torch.cat((torch.tensor([1 if move == "w" else -1]), bitboard.flatten()))

def fen_to_board(fen_str):
    board = torch.zeros(8, 8)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    normalizer = 20
    mapings = {
        "P": 1, "N": 3, "B": 3.5, "R": 5, "Q": 9, "K": 20,
        "p": -1, "n": -3, "b": -3.5, "r": -5, "q": -9, "k": -20,
    } 
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            board[row, col] = mapings[char] / normalizer
            col += 1
    return torch.cat((torch.tensor([1 if move == "w" else -1]), board.flatten())) 

def fen_to_board_mirror(fen_str):
    board = torch.zeros(8, 8)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    normalizer = 20
    mapings = {
        "P": 1, "N": 3, "B": 3.5, "R": 5, "Q": 9, "K": 20,
        "p": -1, "n": -3, "b": -3.5, "r": -5, "q": -9, "k": -20,
    } if move == "w" else {
        "P": -1, "N": -3, "B": -3.5, "R": -5, "Q": -9, "K": -20,
        "p": 1, "n": 3, "b": 3.5, "r": 5, "q": 9, "k": 20,
    }
    fen = fen if move == "w" else fen[::-1]
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            board[row, col] = mapings[char] / normalizer
            col += 1
    return torch.cat((torch.tensor([1 if move == "w" else -1]), board.flatten())) 

In [22]:
def convert_position(game, func):
    board = game.board()
    positions = []
    1500
    for move in game.mainline_moves():
        board_position_tensor = func(board.fen())
        positions.append(board_position_tensor)
        board.push(move)
        
    # Pad the game if it ends on white's turn to batch white's and black's analysis together later
    if len(positions) % 2:
        positions.append(torch.zeros_like(positions[0]))
    
    white_positions = torch.stack([position for position in positions[::2]])
    black_positions = torch.stack([position for position in positions[1::2]])
    
    white_elo = int(game.headers["WhiteElo"])
    black_elo = int(game.headers["BlackElo"])
    
    return torch.stack((white_positions, black_positions)), torch.Tensor([white_elo, black_elo])
    

def convert_positions_to_tensors(dataset, func):
    positions = []
    elo = []
    
    for rating_range, games in dataset.items():
        for game in games:
            position_tensor, elo_tensor = convert_position(game, func)
            positions.append(position_tensor)
            elo.append(elo_tensor)
            
        print(f"{rating_range} done")
        
    return positions, elo

In [27]:
position_converters = {
    "bitboards": fen_to_bitboard,
    "bitboards_mirrors": fen_to_bitboard_mirror,
    "boards": fen_to_board,
    "boards_mirrors": fen_to_board_mirror,
}

# position_type = "bitboards"
# position_type = "bitboards_mirrors"
position_type = "boards"
# position_type = "boards_mirrors"

In [28]:
positions, elo = convert_positions_to_tensors(chess_games, position_converters[position_type])

(800, 1000) done
(1000, 1200) done
(1200, 1400) done
(1400, 1600) done
(1600, 1800) done
(1800, 2000) done
(2000, 2200) done
(2200, 2400) done
(2400, 2600) done
(2600, 3000) done


In [26]:
torch.save((positions, elo), f"positions/all_{position_type}_{start_index}-{start_index + games_per_rating}.pt")