In [1]:
import re
import pandas as pd

In [2]:
with open("data/giri.pgn", "r") as f:
    pgn = f.read()

In [3]:
chunks = pgn.split('\n\n')

In [4]:
info = chunks[::2][:-1]

In [5]:
info[0]

'\ufeff[Event "FTX Crypto Cup KO"]\n[Site "chess24.com INT"]\n[Date "2021.05.27"]\n[Round "1.24"]\n[White "Giri, Anish"]\n[Black "Radjabov, Teimour"]\n[Result "1/2-1/2"]\n[ECO "C65"]\n[WhiteElo "2780"]\n[BlackElo "2765"]\n[PlyCount "115"]\n[EventDate "2021.05.26"]\n[EventType "k.o. (rapid)"]\n[EventRounds "3"]\n[EventCountry "GER"]\n[SourceTitle "CB22_2021"]\n[SourceDate "2021.06.02"]\n[SourceVersion "1"]\n[SourceVersionDate "2021.06.02"]\n[SourceQuality "1"]'

In [6]:
name = "Giri, Anish"

In [7]:
moves = chunks[1::2]

In [8]:
def get_tag(tag, info):
    s = rf'{tag} \"(.+?)\"'
    matches = re.search(s, info)
    return matches.groups()[0] if matches else None

In [9]:
events, rounds, results, opponent_elos = [],[],[],[]

In [10]:
for game in info:
    events.append(get_tag("Event", game))
    rounds.append(get_tag("Round", game))
    
    white = get_tag("White", game)
    black = get_tag("Black", game)

    result = get_tag("Result", game)

    if white == name:
        i = 0
        opponent_elo = get_tag("BlackElo", game)
    elif black == name:
        i = 1
        opponent_elo = get_tag("WhiteElo", game)
    else:
        raise Exception("No matching player!")

    point_str = result.split('-')[i]
    if point_str == "0":
        points = 0
    elif point_str == "1":
        points = 1
    elif point_str == "1/2":
        points = 0.5
    
    results.append(points)
    
    opponent_elos.append(opponent_elo)

In [11]:
df = pd.DataFrame({
    "Event": events,
    "Round": rounds,
    "Result": results,
    "OpponentELO": opponent_elos
})

In [12]:
df.shape

(1000, 4)

In [13]:
df.head()

Unnamed: 0,Event,Round,Result,OpponentELO
0,FTX Crypto Cup KO,1.24,0.5,2765
1,FTX Crypto Cup KO,1.23,0.5,2765
2,FTX Crypto Cup KO,1.22,0.5,2765
3,FTX Crypto Cup KO,1.21,0.5,2765
4,FTX Crypto Cup KO,1.14,0.0,2765


In [14]:
df.tail()

Unnamed: 0,Event,Round,Result,OpponentELO
995,Palma de Mallorca FIDE GP,4,0.0,2801
996,Palma de Mallorca FIDE GP,7,0.5,2774
997,Palma de Mallorca FIDE GP,2,0.5,2796
998,Douglas IoM op,3,0.5,2600
999,Palma de Mallorca FIDE GP,8,0.0,2741


In [15]:
def round2float(r):
    try:
        r2 = float(r)
    except:
        r2 = None
    return r2

In [16]:
df['r2'] = df['Round'].apply(round2float)

In [17]:
df = df.sort_values(["Event", "r2"]).reset_index(drop=True)

In [18]:
df.head()

Unnamed: 0,Event,Round,Result,OpponentELO,r2
0,Abu Dhabi Super Blitz op 1st,5,1.0,2542,5.0
1,Abu Dhabi Super Blitz op 1st,8,1.0,2630,8.0
2,Abu Dhabi Super Blitz op 1st,9,0.5,2736,9.0
3,Abu Dhabi Super Blitz op 1st,10,0.0,2716,10.0
4,Airthings Masters Prelim,1,0.5,2784,1.0


In [19]:
df.tail()

Unnamed: 0,Event,Round,Result,OpponentELO,r2
995,chess.com Speed 5m+1spm 2018,2.4,0.5,2767,2.4
996,chess.com Speed 5m+1spm 2018,2.5,0.0,2767,2.5
997,chess.com Speed 5m+1spm 2018,2.6,0.0,2767,2.6
998,chess.com Speed 5m+1spm 2018,2.7,1.0,2767,2.7
999,chess.com Speed 5m+1spm 2018,2.8,0.0,2767,2.8


In [20]:
df = df[~df["Event"].str.contains('Bullet')] # no bullet

In [21]:
df['OpponentELO'] = df['OpponentELO'].astype('int')

In [22]:
df['previous'] = df['Result'].shift()

In [23]:
df.head()

Unnamed: 0,Event,Round,Result,OpponentELO,r2,previous
0,Abu Dhabi Super Blitz op 1st,5,1.0,2542,5.0,
1,Abu Dhabi Super Blitz op 1st,8,1.0,2630,8.0,1.0
2,Abu Dhabi Super Blitz op 1st,9,0.5,2736,9.0,1.0
3,Abu Dhabi Super Blitz op 1st,10,0.0,2716,10.0,0.5
4,Airthings Masters Prelim,1,0.5,2784,1.0,0.0


In [24]:
events = df['Event'].unique()

In [25]:
idxs = []
for e in events:
    idxs.append(df[df.Event == e].iloc[0].name)

In [26]:
df = df.drop(index=idxs)

In [27]:
df.shape

(913, 6)

In [28]:
df.to_csv("tmp.csv", index=False)

In [29]:
def perf_rating(p, rc):
    dp = (800 * p) - 400
    pr = rc + dp
    return pr

In [30]:
win = df[df['previous'] == 1]
loss = df[df['previous'] == 0]
draw = df[df['previous'] == 0.5]

In [31]:
def get_stats(df):
    p = df.Result.mean()
    rc = df.OpponentELO.mean()
    pr = perf_rating(p, rc)
    return p, pr

In [32]:
# score_df = pd.DataFrame(columns = ['Win', 'Loss', 'Draw'])
# perf_df = pd.DataFrame(columns = ['Win', 'Loss', 'Draw'])

In [33]:
perf_df = pd.read_csv("perf.csv", index_col=0)
score_df = pd.read_csv("score.csv", index_col=0)

In [34]:
p, pr = get_stats(win)

In [35]:
score_df.loc[name, 'Win'] = p

In [36]:
perf_df.loc[name, 'Win'] = pr

In [37]:
p, pr = get_stats(draw)

In [38]:
score_df.loc[name, 'Draw'] = p

In [39]:
perf_df.loc[name, 'Draw'] = pr

In [40]:
p, pr = get_stats(loss)

In [41]:
score_df.loc[name, 'Loss'] = p

In [42]:
perf_df.loc[name, 'Loss'] = pr

In [65]:
score_df = score_df.round(2)

In [62]:
perf_df = perf_df.round().astype("int")

In [66]:
score_df.to_csv("score.csv")

In [63]:
perf_df.to_csv("perf.csv")

In [67]:
score_df

Unnamed: 0,Win,Loss,Draw
"Carlsen, Magnus",0.66,0.73,0.6
"Caruana, Fabiano",0.58,0.47,0.56
"Giri, Anish",0.55,0.55,0.52
"Nepomniachtchi, Ian",0.58,0.54,0.55


In [69]:
perf_df

Unnamed: 0,Win,Loss,Draw
"Carlsen, Magnus",2854,2924,2826
"Caruana, Fabiano",2785,2727,2801
"Giri, Anish",2745,2769,2754
"Nepomniachtchi, Ian",2788,2776,2786
