In [1]:
from pathlib import Path
import pandas as pd

In [None]:
def get_color(player, game):
    player = player.lower()
    if player in game["White"].split(",")[0].lower():
        return "White"
    elif player in game["Black"].split(",")[0].lower():
        return "Black"
    else:
        raise Exception("Player not in game")

In [None]:
def get_cp_loss(player):
    cp = pd.read_csv(f"output/centipawns/{player}.csv", index_col=0)
    cp = cp.clip(-1000, 1000)
    diffs = cp.diff(axis=1)
    md = pd.read_csv(f"output/metadata/{player}.csv", index_col=0)
    colors = [get_color(player, row) for _, row in md.iterrows()]
    all_diffs = []
    for i in range(cp.shape[0]):
        start = 0 if colors[i] == "Black" else 1
        player_diffs = diffs.iloc[i, :].dropna()[start::2].values
        if colors[i] == "White":
            player_diffs *= -1
        player_diffs = player_diffs.clip(min=0)
        all_diffs.append(player_diffs)
    df = pd.DataFrame(all_diffs)
    df.to_csv(f"output/cp_loss/{player}.csv")

In [2]:
pgns = Path("pgns")
#players = [pgn.stem for pgn in pgns.glob("*.pgn")] # uncomment once all players are processed
players = ['carlsen', 'erigaisi', 'gukesh', 'nepo', 'niemann']
players

['carlsen', 'erigaisi', 'gukesh', 'nepo', 'niemann']

Calculate move-by-move centipawn loss.

In [3]:
for player in players:
    print(player)
    get_cp_loss(player)

carlsen


NameError: name 'get_cp_loss' is not defined

Calculate mean cp loss

In [9]:
for player in players:
    loss = pd.read_csv(f"output/cp_loss/{player}.csv", index_col=0)
    game_losses = loss.mean(axis=1)
    df = pd.DataFrame(game_losses)
    df.to_csv(f"output/mean_cp_loss/{player}.csv")

Calculate std cp loss

In [41]:
for player in players:
    loss = pd.read_csv(f"output/cp_loss/{player}.csv", index_col=0)
    game_losses = loss.std(axis=1)
    df = pd.DataFrame(game_losses)
    df.to_csv(f"output/std_cp_loss/{player}.csv")

Calculate elo

In [40]:
import re

for player in players:
    md = pd.read_csv(f"output/metadata/{player}.csv", index_col=0)
    mask_w = md['White'].str.contains(player, flags=re.IGNORECASE, regex=True)
    mask_b = md['Black'].str.contains(player, flags=re.IGNORECASE, regex=True)
    md['Elo'] = 0
    md['Opp Elo'] = 0
    
    # player elo
    md.loc[mask_w, 'Elo'] = md['WhiteElo']
    md.loc[mask_b, 'Elo'] = md['BlackElo']
    
    # opp elo
    md.loc[~mask_w, 'Opp Elo'] = md['WhiteElo']
    md.loc[~mask_b, 'Opp Elo'] = md['BlackElo']
    
    df = pd.DataFrame(md[['Elo', 'Opp Elo']])
    df.to_csv(f"output/elo/{player}.csv")

Get player-based win-loss

In [89]:
for player in players:
    md = pd.read_csv(f"output/metadata/{player}.csv", index_col=0)
    mask_w = md['White'].str.contains(player, flags=re.IGNORECASE, regex=True)
    mask_b = md['Black'].str.contains(player, flags=re.IGNORECASE, regex=True)
    
    md['WinLoss'] = 0
    
    # player winloss
    md.loc[mask_w, 'WinLoss'] = [x.split('-')[0] for x in md.loc[mask_w,'Result']]
    md.loc[mask_b, 'WinLoss'] = [x.split('-')[1] for x in md.loc[mask_b,'Result']]
    
    df = pd.DataFrame(md['WinLoss'])
    df[df['WinLoss'] == 0] = -1
    df[df['WinLoss'] == 1] = 1
    df[df['WinLoss'] == "1/2"] = 0
    df.to_csv(f"output/winloss/{player}.csv")

Get White win-loss

In [90]:
for player in players:
    md = pd.read_csv(f"output/metadata/{player}.csv", index_col=0)
    
    md['WhiteWinLoss'] = [x.split('-')[0] for x in md['Result']]
    
    df = pd.DataFrame(md['WhiteWinLoss'])
    df[df['WhiteWinLoss'] == 0] = -1
    df[df['WhiteWinLoss'] == 1] = 1
    df[df['WhiteWinLoss'] == "1/2"] = 0
    df.to_csv(f"output/whitewinloss/{player}.csv")