In [49]:
import pandas as pd
import numpy as np

user = 'DiciDicee'

df = pd.read_csv(f"~/.chess-stats/game-synthesis/{user}/games.tsv", sep='\t')

pd.set_option('display.min_rows', 50)
pd.set_option('display.max_rows', 100)

In [46]:
opening_outcomes = df[df["date"] >= "2024-01"][["date", "color", "result", "opening_family", "time_control"]]
opening_outcomes = opening_outcomes[opening_outcomes['time_control'] != 'Bullet']

In [47]:
opening_outcomes_count = opening_outcomes.groupby(['color', 'opening_family', 'result']).size().reset_index(name='count')
opening_family_totals = opening_outcomes_count.groupby(['color', 'opening_family'])['count'].transform('sum')


opening_outcomes_count['percentage'] = (opening_outcomes_count['count'] / opening_family_totals) * 100
opening_outcomes_count['games_count'] = opening_family_totals

def convert_score_to_result(df, score_col = 'result', color_col = 'color'):
    conditions = [
        # 1. Win: (White and 1-0) OR (Black and 0-1)
        ((df[color_col] == 'White') & (df[score_col] == '1-0')) | 
        ((df[color_col] == 'Black') & (df[score_col] == '0-1')),
        
        # 2. Loss: (White and 0-1) OR (Black and 1-0)
        ((df[color_col] == 'White') & (df[score_col] == '0-1')) | 
        ((df[color_col] == 'Black') & (df[score_col] == '1-0')),
        
        # 3. Draw: 1/2-1/2 for either color
        (df[score_col] == '1/2-1/2')
    ]
    
    choices = ['Win', 'Loss', 'Draw']
    
    df['result'] = np.select(conditions, choices, default='Unknown')
    
    return df


opening_outcomes_count = convert_score_to_result(opening_outcomes_count)
opening_outcomes_count = opening_outcomes_count.sort_values(by=['games_count', 'opening_family', 'result'], ascending=False)

In [48]:
def identify_weak_openings(df):
    win_rate_map = df[df['result'] == 'Win'].set_index(['color', 'opening_family'])['percentage']
    df['opening_win_rate'] = df.set_index(['color', 'opening_family']).index.map(win_rate_map)
    
    loss_rate_map = df[df['result'] == 'Loss'].set_index(['color', 'opening_family'])['percentage']
    df['opening_loss_rate'] = df.set_index(['color', 'opening_family']).index.map(loss_rate_map)
    
    df_filtered = (
        df[df['opening_win_rate'] < df['opening_loss_rate']]
            .drop(columns=['opening_win_rate', 'opening_loss_rate'])
    )
    return df_filtered

weak_openings = identify_weak_openings(opening_outcomes_count)

In [38]:
def normalized_chess_com_openings():
    openings = pd.read_csv('/Users/davidcourtinot/repos/personal/chess-stats/chess-com-openings.tsv', sep='\t')
    openings['source'] = 'chess.com scraping'
    openings['trustworthiness'] = 3
    openings['eco'] = None
    return openings

def normalized_kaggle_openings():
    openings = pd.read_csv('/Users/davidcourtinot/repos/personal/chess-stats/kaggle-openings.csv', sep=',')
    openings = openings.drop(columns=[
        '#', 'Num Games', 'Opponent Win %', 'Draw %', 'Player Win %', 'Colour', 'Avg Player', 'Num Games', 
        'Last Played', 'Perf Rating', 'moves_list', 'move1w', 'move1b', 'move2w', 'move2b', 'move3w', 'move3b', 
        'move4w', 'move4b', 'White_win%', 'Black_win%', 'White_odds', 'White_Wins', 'Black_Wins'
    ])
    openings = openings.rename(columns={
        'Opening': 'opening_name', 
        "ECO": "eco",
        'Moves': 'opening_moves'
    })
    openings['opening_moves'] = openings['opening_moves'].str.replace('1.00E+0', '1.e')
    openings['source'] = 'Kaggle'
    openings['trustworthiness'] = 2
    return openings    

def normalized_chess_canvas_openings():
    openings = pd.read_csv('/Users/davidcourtinot/repos/personal/chess-stats/chess-canvas-openings.csv', sep=',')
    openings = openings.rename(columns={
        'name': 'opening_name', 
        "ECO": "eco",
        'moves': 'opening_moves'
    })
    openings['opening_moves'] = openings['opening_moves'].str.replace('1.00E+0', '1.e')
    openings['opening_moves'] = openings['opening_moves'].str.strip()
    openings['source'] = 'Github chess-canvas'
    openings['trustworthiness'] = 1
    return openings        

chess_com_openings = normalized_chess_com_openings()
kaggle_openings = normalized_kaggle_openings()
chess_canvas_openings = normalized_chess_canvas_openings()

In [42]:
def reconcile_openings(openings_dfs):
    openings_all_sources = pd.concat(openings_dfs)
    openings_all_sources_sorted = openings_all_sources.sort_values(by=['opening_moves', 'trustworthiness'])
    return openings_all_sources_sorted.groupby('opening_moves').last().reset_index()

reconciled_openings = reconcile_openings([chess_com_openings, kaggle_openings, chess_canvas_openings])