In [1]:
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)

In [2]:
matches = pd.read_csv('../../preprocessing/data/matches.csv')

In [3]:
def calculate_player_stats(matches_df: pd.DataFrame, round_order: dict[str, int]) -> pd.DataFrame:
    matches_df['Round_Num'] = matches_df['Round'].map(round_order)
    bets_sorted = matches_df.sort_values(by=['Date', 'Round_Num']).copy()
    player_stats = {}

    for index, row in bets_sorted.iterrows():
        winner_id = row['winner_id']
        loser_id = row['loser_id']
        tournament_id = row['tournament_id']
        
        if tournament_id not in player_stats:
            player_stats[tournament_id] = {}
        
        if winner_id not in player_stats[tournament_id]:
            player_stats[tournament_id][winner_id] = {'sets': 0, 'gems': 0}
        if loser_id not in player_stats[tournament_id]:
            player_stats[tournament_id][loser_id] = {'sets': 0, 'gems': 0}
        
        bets_sorted.at[index, 'Winner_Set_Diff_Tournament'] = player_stats[tournament_id][winner_id]['sets']
        bets_sorted.at[index, 'Winner_Game_Diff_Tournament'] = player_stats[tournament_id][winner_id]['gems']
        bets_sorted.at[index, 'Loser_Set_Diff_Tournament'] = player_stats[tournament_id][loser_id]['sets']
        bets_sorted.at[index, 'Loser_Game_Diff_Tournament'] = player_stats[tournament_id][loser_id]['gems']
        
        winner_sets = int(row['Wsets']) if pd.notna(row['Wsets']) else 0
        loser_sets = int(row['Lsets']) if pd.notna(row['Lsets']) else 0
        
        winner_gems = sum(row[['W1', 'W2', 'W3', 'W4', 'W5']].fillna(0).values[:(winner_sets + loser_sets)])
        loser_gems = sum(row[['L1', 'L2', 'L3', 'L4', 'L5']].fillna(0).values[:(winner_sets + loser_sets)])
        
        player_stats[tournament_id][winner_id]['sets'] += (winner_sets - loser_sets)
        player_stats[tournament_id][winner_id]['gems'] += (winner_gems - loser_gems)
        
        player_stats[tournament_id][loser_id]['sets'] += (loser_sets - winner_sets)
        player_stats[tournament_id][loser_id]['gems'] += (loser_gems - winner_gems)
    
    return bets_sorted


round_order = {
    '1st Round': 1,
    '2nd Round': 2,
    '3rd Round': 3,
    '4th Round': 4,
    'Quarterfinals': 5,
    'Semifinals': 6,
    'The Final': 7
}


In [4]:
matches = calculate_player_stats(matches, round_order)

In [5]:
matches[["match_id", "Round_Num", 'Winner_Set_Diff_Tournament', 'Winner_Game_Diff_Tournament', 'Loser_Set_Diff_Tournament', 'Loser_Game_Diff_Tournament']].to_csv("../data/player_current_tournament_record.csv", index=False)