In [15]:
import torch


mapings = {
    "P": 0, "N": 1, "B": 2, "R": 3, "Q": 4, "K": 5,
    "p": 6, "n": 7, "b": 8, "r": 9, "q": 10, "k": 11
}

def fen_to_bitboard(fen_str):
    bitboard = torch.zeros(12, 64)
    fen, move, _castle, _en_passant, _halfmove, _fullmove = fen_str.split(" ")
    row, col = 0, 0
    for char in fen:
        if char == "/":
            row += 1
            col = 0
        elif char.isdigit():
            col += int(char)
        else:
            bitboard[mapings[char], row * 8 + col] = 1
            col += 1
    # Flatten the bitboard and add whose move it is
    return torch.cat((torch.tensor([1 if move == "w" else -1]), bitboard.flatten()))

In [16]:
import chess.pgn
import chess.engine

In [17]:
rating_ranges = ["-900", "900-1100", "1100-1300", "1300-1500", "1500-1700", "1700-1900", "1900-2100", "2100-2300", "2300-2500", "2500-"]

In [20]:
def my_int(c):
    try:
        return int(c)
    except ValueError:
        return 0

In [47]:
chess_games = {rating: [] for rating in rating_ranges}
min_elo = 9999
max_elo = 0

for rating_range in rating_ranges:
    file = "datasets/outputs/" + rating_range + ".pgn"

    lower_bound = my_int(rating_range.split("-")[0])
    upper_bound = my_int(rating_range.split("-")[1])

    lower_bound = max(0, lower_bound)
    upper_bound = upper_bound if upper_bound else 3000

    with open(file) as f:
        while len(chess_games[rating_range]) < 200:
            game = chess.pgn.read_game(f)
            if game is None:
                break
            if "UltraBullet" in game.headers["Event"]:
                continue
            if "Bullet" in game.headers["Event"] and lower_bound <= 2100:
                continue
            if game.headers["WhiteElo"] == "?" or game.headers["BlackElo"] == "?":
                continue
            if (
                not lower_bound <= int(game.headers["WhiteElo"]) <= upper_bound
                or not lower_bound <= int(game.headers["BlackElo"]) <= upper_bound
            ):
                continue
            
            chess_games[rating_range].append(game)
            
            min_elo = min(min_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            max_elo = max(max_elo, int(game.headers["WhiteElo"]), int(game.headers["BlackElo"]))
            
    print(f"{rating_range} games: {len(chess_games[rating_range])} done")
    
print(f"Min elo: {min_elo}, Max elo: {max_elo}")

-900 games: 200 done
900-1100 games: 200 done
1100-1300 games: 200 done
1300-1500 games: 200 done
1500-1700 games: 200 done
1700-1900 games: 200 done
1900-2100 games: 200 done
2100-2300 games: 200 done
2300-2500 games: 200 done
2500- games: 200 done
Min elo: 800, Max elo: 2805


In [28]:
engine = chess.engine.SimpleEngine.popen_uci("/usr/bin/stockfish")
mate_score = 99999

In [30]:
def get_top_moves(board, n):
    moves = []
    for move in board.legal_moves:
        board.push(move)
        moves.append((move, engine.analyse(board, chess.engine.Limit(time=0.1))["score"].white().score(mate_score=mate_score)))
        board.pop()
    return sorted(moves, key=lambda x: x[1], reverse=board.turn)[:n]

def pad_tensor(tensor, pad_len, pad_value):
    if tensor.shape[0] >= pad_len:
        return tensor[:pad_len]
    
    return torch.tensor([tensor.tolist() + [pad_value] * (pad_len - tensor.shape[0])]).flatten()

In [62]:
def analyze_game(game, nof_moves):
    board = game.board()
    analysis = [] 
    
    for move in game.mainline_moves():
        pad_value = -mate_score if board.turn else mate_score

        board_position_tensor = fen_to_bitboard(board.fen())

        # top_moves = get_top_moves(board, nof_moves)
        # top_moves_tensor = pad_tensor(torch.tensor([move[1] for move in top_moves]), nof_moves, pad_value)
        top_moves_tensor = torch.tensor([engine.analyse(board, chess.engine.Limit(time=0.1))["score"].white().score(mate_score=mate_score)])
        
        board.push(move)
        
        board_eval_tensor = torch.tensor([engine.analyse(board, chess.engine.Limit(time=0.1))["score"].white().score(mate_score=mate_score)])
        
        analysis.append(torch.cat((board_position_tensor, top_moves_tensor, board_eval_tensor)))
        
    white_analysis = analysis[::2]
    black_analysis = analysis[1::2]
    
    white_elo = int(game.headers["WhiteElo"])
    black_elo = int(game.headers["BlackElo"])
    
    return white_analysis, black_analysis, calculate_elo_range(white_elo), calculate_elo_range(black_elo)

def calculate_elo_range(elo):
    rating_ranges = [
        (400, 900), 
        (900, 1100),
        (1100, 1300), 
        (1300, 1500), 
        (1500, 1700), 
        (1700, 1900), 
        (1900, 2100), 
        (2100, 2300), 
        (2300, 2500), 
        (2500, 3000)
    ]
    import scipy.stats as st
    stdv = 200
    return torch.tensor(
        [
            st.norm.cdf(rating_range[1], loc=elo, scale=stdv) - 
            st.norm.cdf(rating_range[0], loc=elo, scale=stdv) 
            for rating_range in rating_ranges
            ]
    )

In [63]:
def guess_elo_range(elo_range):
    rating_ranges = [
        (400, 900), 
        (900, 1100),
        (1100, 1300), 
        (1300, 1500), 
        (1500, 1700), 
        (1700, 1900), 
        (1900, 2100), 
        (2100, 2300), 
        (2300, 2500), 
        (2500, 3000)
    ]
    s = 0
    for r, probability in zip(rating_ranges, elo_range):
        s += probability * (r[0] + r[1]) / 2
    
    return s / sum(elo_range)

In [64]:
x_dataset = []
y_dataset = []
for games in chess_games.values():
    for game in games[:3]:
        white_analysis, black_analysis, white_elo_range, black_elo_range = analyze_game(game, 10)
        
        x_dataset.append(white_analysis)
        x_dataset.append(black_analysis)
        y_dataset.append(white_elo_range)
        y_dataset.append(black_elo_range)
        
        print(f"{game.headers['WhiteElo']} vs {game.headers['BlackElo']}")
    

892 vs 848
Converted: 855.051753653754 vs 814.4264989154635
820 vs 800
Converted: 790.9802347135861 vs 775.49371971151
858 vs 844
Converted: 823.2734102322725 vs 810.955659853074
938 vs 922
Converted: 901.6028382227018 vs 885.010027990772
943 vs 970
Converted: 906.8642213103817 vs 935.8104896750864
1084 vs 1042
Converted: 1063.1561676080926 vs 1015.8353795257541
1222 vs 1224
Converted: 1214.8927825538256 vs 1217.027464305744
1186 vs 1237
Converted: 1176.139758730575 vs 1230.8575886425017
1183 vs 1187
Converted: 1172.8818174691519 vs 1177.2247599986629
1340 vs 1369
Converted: 1338.055495281748 vs 1367.6571090467905
1451 vs 1478
Converted: 1450.5774132071112 vs 1477.7212473883476
1314 vs 1393
Converted: 1311.336487208608 vs 1392.0264863431503
1648 vs 1514
Converted: 1647.9879319459133 vs 1513.844269589269
1531 vs 1534
Converted: 1530.8830061909298 vs 1533.8888485755729
1652 vs 1628
Converted: 1651.9891112448408 vs 1627.9808696541227
1772 vs 1785
Converted: 1772.0191303458773 vs 1785.0250

In [82]:
print(len(x_dataset))
print(len(x_dataset[0]))
print(len(x_dataset[0][0]))

60
14
771


In [96]:
from lstm_netwrok import train_model, initialize_model

lstm, optimizer, loss_func = initialize_model(input_size=len(x_dataset[0][0]), hidden_size=128, num_layers=2, num_classes=len(y_dataset[0]))

x_train = [torch.stack([position for position in game]) for game in x_dataset]

train_model(lstm, optimizer, loss_func, (x_train, y_dataset), num_epochs=100)

IndexError: too many indices for tensor of dimension 2