In [1]:
import torch
import chess.pgn
import chess.engine

In [2]:
rating_ranges = [
    (400, 900),
    (900, 1100),
    (1100, 1300),
    (1300, 1500),
    (1500, 1700),
    (1700, 1900),
    (1900, 2100),
    (2100, 2300),
    (2300, 2500),
    (2500, 3000),
]
rating_ranges_labels = [
    "-900",
    "900-1100",
    "1100-1300",
    "1300-1500",
    "1500-1700",
    "1700-1900",
    "1900-2100",
    "2100-2300",
    "2300-2500",
    "2500-",
]

### Load Games

In [3]:
chess_games = {rating: [] for rating in rating_ranges_labels}
min_elo = 9999
max_elo = 0

for rating_label, rating_range in zip(rating_ranges_labels, rating_ranges):
    file = "datasets/outputs/" + rating_label + ".pgn"

    lower_bound = rating_range[0]
    upper_bound = rating_range[1]

    with open(file) as f:
        while len(chess_games[rating_label]) < 200:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            if "UltraBullet" in game.headers["Event"]:
                continue
            if "Bullet" in game.headers["Event"] and lower_bound <= 2100:
                continue
            if game.headers["WhiteElo"] == "?" or game.headers["BlackElo"] == "?":
                continue
            if (
                not lower_bound <= int(game.headers["WhiteElo"]) <= upper_bound
                or not lower_bound <= int(game.headers["BlackElo"]) <= upper_bound
            ):
                continue
            if not game.mainline_moves():
                continue
            
            chess_games[rating_label].append(game)
            
            min_elo = min(min_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            max_elo = max(max_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            
    print(f"{rating_label} games: {len(chess_games[rating_label])} done")
    
print(f"Min elo: {min_elo}, Max elo: {max_elo}")

-900 games: 200 done
900-1100 games: 200 done
1100-1300 games: 200 done
1300-1500 games: 200 done
1500-1700 games: 200 done
1700-1900 games: 200 done
1900-2100 games: 200 done
2100-2300 games: 200 done
2300-2500 games: 200 done
2500- games: 200 done
Min elo: 800, Max elo: 2805


### Analyze the games

In [4]:
engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")
mate_score = 9999

In [12]:
def fen_to_bitboard(fen_str):
    mapings = {
        "P": 0, "N": 1, "B": 2, "R": 3, "Q": 4, "K": 5,
        "p": 6, "n": 7, "b": 8, "r": 9, "q": 10, "k": 11
    }   
    
    bitboard = torch.zeros(12, 64)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            bitboard[mapings[char], row * 8 + col] = 1
            col += 1
    # Flatten the bitboard and add whose move it is
    return torch.cat((torch.tensor([1 if move == "w" else -1]), bitboard.flatten()))

def get_top_moves(board, n):
    moves = []
    for move in board.legal_moves:
        board.push(move)
        moves.append((move, engine.analyse(board, chess.engine.Limit(time=0.001))["score"].white().score(mate_score=mate_score)))
        board.pop()
    return sorted(moves, key=lambda x: x[1], reverse=board.turn)[:n]

def pad_tensor(tensor, pad_len, pad_value):
    if tensor.shape[0] >= pad_len:
        return tensor[:pad_len]
    
    return torch.tensor([tensor.tolist() + [pad_value] * (pad_len - tensor.shape[0])]).flatten()

In [19]:
def analyze_game(game, nof_moves=10):
    board = game.board()
    analysis = []
    
    for move in game.mainline_moves():
        pad_value = -9999 if board.turn else 9999
        
        board_position_tensor = fen_to_bitboard(board.fen())
        
        # top_moves = get_top_moves(board, nof_moves)
        # top_moves_tensor = pad_tensor(torch.tensor([move[1] for move in top_moves]), nof_moves, pad_value)
        best_move_tensor = torch.tensor([engine.analyse(board, chess.engine.Limit(time=0.1))["score"].white().score(mate_score=mate_score)])
        
        board.push(move)
        
        after_move_tensor = torch.tensor([engine.analyse(board, chess.engine.Limit(time=0.1))["score"].white().score(mate_score=mate_score)])

        # analysis.append(torch.cat((board_position_tensor, top_moves_tensor, after_move_tensor)))
        analysis.append(torch.cat((board_position_tensor, best_move_tensor, after_move_tensor)))
    
    # Pad the game if it ends on white's turn to bunch white's and black's analysis together later
    if len(analysis) % 2:
        analysis.append(torch.zeros_like(analysis[0]))
    
    white_analysis = torch.stack([position for position in analysis[::2]])
    black_analysis = torch.stack([position for position in analysis[1::2]])
    
    white_elo = int(game.headers["WhiteElo"])
    black_elo = int(game.headers["BlackElo"])
    
    return white_analysis, black_analysis, white_elo, black_elo

In [26]:
def analyze_games(n=None):
    dataset = []
    for label, games in chess_games.items():
        for i, game in enumerate(games[:n]):
            white_analysis, black_analysis, white_elo_range, black_elo_range = analyze_game(game)
            
            dataset.append((
                (white_analysis, black_analysis),
                (white_elo_range, black_elo_range)
            ))
            # if True:
            if (i+1) % 10 == 0:
                print(f"Game {i + 1} done")
                
        print(f"{label} done")
    return dataset

In [27]:
n = None
dataset = analyze_games(n)

torch.save(dataset, "datasets/dataset_all_games_01.pt")

Game 10 done
Game 20 done
Game 30 done
Game 40 done
Game 50 done
Game 60 done
Game 70 done
Game 80 done
Game 90 done
Game 100 done
Game 110 done
Game 120 done
Game 130 done
Game 140 done
Game 150 done
Game 160 done
Game 170 done
Game 180 done
Game 190 done
Game 200 done
-900 done
Game 10 done
Game 20 done
Game 30 done
Game 40 done
Game 50 done
Game 60 done
Game 70 done
Game 80 done
Game 90 done
Game 100 done
Game 110 done
Game 120 done
Game 130 done
Game 140 done
Game 150 done
Game 160 done
Game 170 done
Game 180 done
Game 190 done
Game 200 done
900-1100 done
Game 10 done
Game 20 done
Game 30 done
Game 40 done
Game 50 done
Game 60 done
Game 70 done
Game 80 done
Game 90 done
Game 100 done
Game 110 done
Game 120 done
Game 130 done
Game 140 done
Game 150 done
Game 160 done
Game 170 done
Game 180 done
Game 190 done
Game 200 done
1100-1300 done
Game 10 done
Game 20 done
Game 30 done
Game 40 done
Game 50 done
Game 60 done
Game 70 done
Game 80 done
Game 90 done
Game 100 done
Game 110 done
Gam