In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import re
import os
# import math
from tqdm.notebook import tqdm

In [None]:
dir_preprocessed = 'data_preprocessed/'

In [None]:
df_matches = pd.read_csv(dir_preprocessed + 'matches_male.csv', index_col =0 )

In [None]:
df_matches.head()

In [None]:
def expected_score(rating1, rating2): 
    return 1 / (1 + 10**((rating2 - rating1) / 400))

for rating1 in range(0, 400, 20):
    print(f'Delta rating: {rating1:3}.  Probability: {expected_score(rating1, 0)}')

In [None]:
def new_ratings(rating_winner, rating_loser, K = 90, result = None, use_games = False):
    exp_score = expected_score(rating_winner, rating_loser)
    
    if not use_games:
        actual_score = 1
    elif use_games:
        actual_scores = {
            '3/0': 1.00,
            '3/1': 0.90,
            '3/2': 0.80,
            '2/0': 0.90,
            '2/1': 0.80,
        }

        # if the result is normal, i.e. it is a key in actual_scores variable, then update results.
        # otherwise, result is something like 'walkover', so it is deemed uninformative
        # in this latter case, manually make actual_score = expected_score, so there is no updates to ratings
        actual_score = actual_scores.get(result, exp_score)
    
    rating_winner_new = rating_winner + K*(actual_score - exp_score)
    rating_loser_new = rating_loser + K*(exp_score - actual_score)
    return exp_score, rating_winner_new, rating_loser_new

In [None]:
def create_elo_ratings(df_input, use_games = False, K = 90):
    df = df_input.copy()
    
    player_ratings = {}

    ratings_winners = []
    ratings_losers = []
    expected_scores = []
    
    indices = df.index.to_list()
    winners = df.winner_name.to_list()
    losers = df.loser_name.to_list()
    results = df.results_processed.to_list()
    

    for index, winner, loser, result in zip(indices, winners, losers, results):    
        if winner not in player_ratings:
            player_ratings[winner] = [1500]
        rating_winner = player_ratings[winner][-1]

        if loser not in player_ratings:
            player_ratings[loser] = [1500]
        rating_loser = player_ratings[loser][-1]

        ratings_winners.append(rating_winner)
        ratings_losers.append(rating_loser)

        exp, rating_winner_new, rating_loser_new = new_ratings(rating_winner,
                                                               rating_loser,
                                                               K = K,
                                                               result=result,
                                                               use_games = use_games
                                                              )

        expected_scores.append(exp)
        player_ratings[winner].append(rating_winner_new)
        player_ratings[loser].append(rating_loser_new)

    df['winner_rating'] = ratings_winners
    df['loser_rating'] = ratings_losers
    df['predicted_score'] = expected_scores
    
    return df, player_ratings

In [None]:
def create_current_ratings(player_ratings):
    current_ratings = { player: player_ratings[player][-1] for player in player_ratings.keys()}
    current_ratings = pd.Series(current_ratings, name='rating')
    
    n_matches = { player: len(player_ratings[player]) for player in player_ratings.keys()}
    n_matches = pd.Series(n_matches, name='nMatches')
    
    df =  pd.concat([current_ratings, n_matches], axis=1)
    return df.sort_values(by = 'rating', ascending = False)

In [None]:
def create_callabration_frame(df_matches, df_ratings, N=2):
    cut_off = df_ratings.iloc[50, 0]
    
    df = df_matches.copy()
    
    df['predicted_score_better_player'] = (
        df
        .predicted_score
        .apply(lambda x: round(N*x,1)/N if x> 0.5 else 1-round(N*x,1)/N)
    )

    df['true_score_better_player'] = (
        df
        .predicted_score
        .apply(lambda x: 1 if x>0.5 else 0)
    )

    indices = ((df.winner_rating > cut_off)| (df.loser_rating > cut_off) )& (df_matches.index < 10000)

    return (df[indices]
     .groupby('predicted_score_better_player')
     .agg({'true_score_better_player':['count', 'mean']})
    )

In [None]:
def print_player_stats(df, player):
    indices = (df.winner_name == player) | (df.loser_name == player)
    df_player = df[indices]
    
    for _,row in df_player.iterrows():
        w = row.winner_name
        wr = row.winner_rating
        ws = row.winner_seed
        l = row.loser_name
        lr = row.loser_rating
        ls = row.loser_seed
        exp = row.predicted_score
        result = row.results_processed
        print(f'{w:20} beat {l:20} {result:8} {wr:.0f} vs {lr:.0f}   {ws:3} vs {ls:3}   {exp:.2f}')

# look at no-games analysis

In [None]:
df_nogames, player_ratings_nogames = create_elo_ratings(df_matches, K = 40)

In [None]:
df_ratings_nogames = create_current_ratings(player_ratings_nogames)
df_ratings_nogames.head(50)

In [None]:
df_callab_nogames = create_callabration_frame(df_nogames, df_ratings_nogames)
df_callab_nogames

In [None]:
print_player_stats(df_nogames, 'Ramy Ashour')

# with games analysis

In [None]:
df_games, player_ratings_games = create_elo_ratings(df_matches, use_games = True, K = 90)

In [None]:
df_ratings_games = create_current_ratings(player_ratings_games)
df_ratings_games.head(50)

In [None]:
df_callab_games = create_callabration_frame(df_games, df_ratings_games)
df_callab_games

In [None]:
print_player_stats(df_games, 'Ramy Ashour')

# highest ever elo rating

In [None]:
df_matches.loc[df_matches.loser_rating.idxmax()]