In [1]:
import pandas as pd
import numpy as np
import altair as alt
from scipy.optimize import minimize
import json

from true_skill_through_time import *

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

This notebook does essentially the same thing as the `true_skill_wc3` notebook, except it does it via the `TrueSkillThroughTimeApplied` class.

The class is to be used on more sports/esports, outside of Warcraft3.

In [2]:
games_raw = pd.read_csv('data/warcraft3.csv').query('(competitor_1_score > -0.0001) & (competitor_2_score > -0.0001)').iloc[-10000:]
games_raw['timestamp'] = pd.to_datetime(games_raw['date'])

#### there are 133779 rows in warcraft3.csv, 130000 after filtering

In [14]:
len(games_raw)

10000

In [15]:
game_composition = []
times = []
np.random.seed(0)

for _, row in games_raw.iterrows():
    c1, c2, c1s, c2s, t = row['competitor_1'], row['competitor_2'], row['competitor_1_score'], row['competitor_2_score'], row['timestamp']

    assert c1s == int(c1s)
    assert c2s == int(c2s)
    
    comp = [(c1, c2)] * int(c1s) + [(c2, c1)] * int(c2s)
    comp = np.random.permutation(comp).tolist() # Game order matters and we don't actually know it, so we randomize over it.

    for cp_g in comp:
        game_composition.append(cp_g)
        times.append(t)

games = pd.DataFrame(game_composition, columns=['winner', 'loser']).assign(timestamp = times)
games.head()

Unnamed: 0,winner,loser,timestamp
0,WoodyWood,Singroso,2023-08-20
1,WoodyWood,Ag3nt,2023-08-20
2,ZeLt,Ag3nt,2023-08-20
3,D3r_schosch,Ag3nt,2023-08-20
4,Rav3n,Rici,2023-08-20


In [16]:
games.shape

(18370, 3)

In [13]:
total_matches_df = games.winner.value_counts().add(games.loser.value_counts(), fill_value=0).sort_values()

In [14]:
total_matches_df.describe()

count    1150.000000
mean       31.530435
std        96.343274
min         1.000000
25%         2.000000
50%         6.000000
75%        19.000000
max      1324.000000
Name: count, dtype: float64

In [15]:
players_ge_40_matches_df = total_matches_df[total_matches_df>=40.0]

In [16]:
players_ge_40_matches_df = players_ge_40_matches_df.to_frame().reset_index()

In [17]:
players_ge_40_matches_df = players_ge_40_matches_df.rename(columns={'index': 'fighter'})

In [18]:
players_ge_40_matches_lst = players_ge_40_matches_df.fighter.unique().tolist()

In [19]:
with open("data/players_ge_40_matches_lst_warcraft3.json", "w") as f:
    json.dump(players_ge_40_matches_lst, f, indent=4)

In [17]:
#games.to_csv("data/warcraft3_games_last10k.csv", index=False)

In [3]:
games = pd.read_csv("data/warcraft3_games_last10k.csv")

In [4]:
games.timestamp = pd.to_datetime(games.timestamp)

In [5]:
self = TrueSkillThroughTimeApplied(games)

In [6]:
print(games.head().to_string())

        winner     loser  timestamp  time_0_to_999_int
0    WoodyWood  Singroso 2023-08-20                  0
1    WoodyWood     Ag3nt 2023-08-20                  0
2         ZeLt     Ag3nt 2023-08-20                  0
3  D3r_schosch     Ag3nt 2023-08-20                  0
4        Rav3n      Rici 2023-08-20                  0


In [7]:
self.learn_optimal_parameters()


gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 11903.7462

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 11903.7461

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 11903.7462

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 11903.7462

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 12004.7565

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 12004.7565

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 12004.7565

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 12004.7565

gamma: 0.0464, sigma: 0.0464, beta: 0.0464, 
NLE: 11004.7041

gamma: 0.0464, sigma: 0.0464, beta: 0.0464, 
NLE: 11004.7041

gamma: 0.0464, sigma: 0.0464, beta: 0.0464, 
NLE: 11004.7041

gamma: 0.0464, sigma: 0.0464, beta: 0.0464, 
NLE: 11004.7041

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10925.2353

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10925.2353

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10925.2353

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10925.2353

gamma: 

In [8]:
optimal_gamma =  0.0902
optimal_sigma = 3.8727
optimal_beta = 2.6058

self.set_optimal_parameters(gamma = optimal_gamma, sigma = optimal_sigma, beta = optimal_beta)

In [2]:
top_players_elo = ['Happy', 'Lyn', 'Fortitude', 'Eer0', 'Sok', 'ColorFul', 'Kaho', 'FoCuS', 'Moon',  'LabyRinth', 'Life', 'LawLiet', 'Blade', 'Fly100%', 'Starbuck', 'Soin', 'Infi', 'Sini',  'Dise', 'Chaemiko', 'EleGaNt', 'Leon', 'Lin_Guagua', 'WFZ', 'XiaoKK', 'XlorD', 'ReMinD', 'PaTo', 'HawK']

In [10]:
skill_curves = self.set_skill_curves()

In [11]:
self.plot_player_skills(players = top_players_elo[:5])

In [12]:
self.plot_calibration()

  bucket_means = df.groupby('win_prob_bucket').agg(


In [39]:
wins = games.groupby('winner').size().reset_index(name='wins')
losses = games.groupby('loser').size().reset_index(name='losses')
result = pd.merge(wins, losses, left_on='winner', right_on='loser', how='outer').fillna(0)
result = result.rename(columns={'winner': 'player'})

In [40]:
result_topplayer = result[result.player.isin(players_ge_40_matches_lst)]

In [41]:
result_topplayer['win_rate'] = result_topplayer.apply(lambda row: row.wins/(row.wins+row.losses), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_topplayer['win_rate'] = result_topplayer.apply(lambda row: row.wins/(row.wins+row.losses), axis=1)


In [42]:
result_topplayer.win_rate.describe()

count    181.000000
mean       0.485931
std        0.125252
min        0.162791
25%        0.407407
50%        0.499368
75%        0.576000
max        0.776119
Name: win_rate, dtype: float64

### use -20000 to -10000 games as train set and last 10000 games as test set

In [2]:
games_raw = pd.read_csv('data/warcraft3.csv').query('(competitor_1_score > -0.0001) & (competitor_2_score > -0.0001)').iloc[-20000:-10000]
games_raw['timestamp'] = pd.to_datetime(games_raw['date'])
games_raw_oos = pd.read_csv('data/warcraft3.csv').query('(competitor_1_score > -0.0001) & (competitor_2_score > -0.0001)').iloc[-10000:]
games_raw_oos['timestamp'] = pd.to_datetime(games_raw_oos['date'])

In [3]:
game_composition = []
times = []
np.random.seed(0)

for _, row in games_raw.iterrows():
    c1, c2, c1s, c2s, t = row['competitor_1'], row['competitor_2'], row['competitor_1_score'], row['competitor_2_score'], row['timestamp']

    assert c1s == int(c1s)
    assert c2s == int(c2s)
    
    comp = [(c1, c2)] * int(c1s) + [(c2, c1)] * int(c2s)
    comp = np.random.permutation(comp).tolist() # Game order matters and we don't actually know it, so we randomize over it.

    for cp_g in comp:
        game_composition.append(cp_g)
        times.append(t)

games = pd.DataFrame(game_composition, columns=['winner', 'loser']).assign(timestamp = times)
games.head()

Unnamed: 0,winner,loser,timestamp
0,Eightyfour,Inspired,2022-07-10
1,Inspired,Eightyfour,2022-07-10
2,Inspired,Eightyfour,2022-07-10
3,Sasuke,BlaST9Z,2022-07-10
4,BlaST9Z,Sasuke,2022-07-10


In [4]:
game_composition_oos = []
times_oos = []
np.random.seed(0)

for _, row in games_raw_oos.iterrows():
    c1, c2, c1s, c2s, t = row['competitor_1'], row['competitor_2'], row['competitor_1_score'], row['competitor_2_score'], row['timestamp']

    assert c1s == int(c1s)
    assert c2s == int(c2s)
    
    comp = [(c1, c2)] * int(c1s) + [(c2, c1)] * int(c2s)
    comp = np.random.permutation(comp).tolist() # Game order matters and we don't actually know it, so we randomize over it.

    for cp_g in comp:
        game_composition_oos.append(cp_g)
        times_oos.append(t)

games_oos = pd.DataFrame(game_composition_oos, columns=['winner', 'loser']).assign(timestamp = times_oos)
games_oos.head()

Unnamed: 0,winner,loser,timestamp
0,WoodyWood,Singroso,2023-08-20
1,WoodyWood,Ag3nt,2023-08-20
2,ZeLt,Ag3nt,2023-08-20
3,D3r_schosch,Ag3nt,2023-08-20
4,Rav3n,Rici,2023-08-20


In [5]:
self = TrueSkillThroughTimeApplied(games)

In [6]:
self.learn_optimal_parameters()


gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 10668.8545

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 10668.8545

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 10668.8545

gamma: 0.0200, sigma: 0.0200, beta: 0.0200, 
NLE: 10668.8545

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 11544.6210

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 11544.6210

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 11544.6210

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 11544.6210

gamma: 0.0401, sigma: 0.0401, beta: 0.0401, 
NLE: 10404.0320

gamma: 0.0401, sigma: 0.0401, beta: 0.0401, 
NLE: 10404.0319

gamma: 0.0401, sigma: 0.0401, beta: 0.0401, 
NLE: 10404.0320

gamma: 0.0401, sigma: 0.0401, beta: 0.0401, 
NLE: 10404.0320

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10294.2822

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10294.2822

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10294.2822

gamma: 0.1000, sigma: 0.1000, beta: 0.1000, 
NLE: 10294.2822

gamma: 

In [7]:
optimal_gamma = 0.1000
optimal_sigma = 2.5164
optimal_beta = 1.7097

self.set_optimal_parameters(gamma = optimal_gamma, sigma = optimal_sigma, beta = optimal_beta)

In [8]:
skill_curves = self.set_skill_curves()

In [9]:
self.plot_calibration()

  bucket_means = df.groupby('win_prob_bucket').agg(


In [10]:
self.plot_calibration_oos(oos_data=games_oos)

  bucket_means = df.groupby('win_prob_bucket').agg(


### as in the oos_calibration plot, we still have a close to diagonal line for pred_win_prob vs avg_outcome. However, the model tends to underestimate the underdog player and overestimate the top dog since it only uses the historical data to calculate win_prob