In [1]:
import pandas as pd
import numpy as np
import chess_soup

In [2]:
df = pd.DataFrame(chess_soup.get_full_player_list())

Done


In [3]:
df = df[df['games'] >0] #Limit to players active in current rating list

In [4]:
#Top 10 ages for average rank in top 100
age_rank_list = []

for age in range(10, 90):
    mean = df[df['age'] == age]['rank'].mean()
    if mean is not np.nan:
        age_rank_list.append((age,mean))

age_rank_list.sort(key=lambda item:item[1])

for rank, age_tuple in enumerate(age_rank_list[:10]):
    print(f'{rank + 1}) Age {age_tuple[0]} - Average rank : {age_tuple[1]:.1f}')

1) Age 32 - Average rank : 45.1
2) Age 31 - Average rank : 46.1
3) Age 34 - Average rank : 46.3
4) Age 35 - Average rank : 46.7
5) Age 23 - Average rank : 46.7
6) Age 33 - Average rank : 47.6
7) Age 22 - Average rank : 47.9
8) Age 21 - Average rank : 48.5
9) Age 70 - Average rank : 49.0
10) Age 30 - Average rank : 49.6


In [5]:
df.sort_values(by=['name','rating_list'], inplace=True) #Sort by name and date

In [6]:
def get_month_diff(rating_list_1, rating_list_2):
    """ Get the difference in months between two dates """
    try:
        delta = rating_list_2 - rating_list_1
        return round(delta.days / 30)
    except Exception:
        return np.nan

In [7]:
#Add data from previous rating list to dataframe for comparison
df['previous_name'] = df.loc[:,'name'].shift(1) 
df['previous_rating'] = df.loc[:,'rating'].shift(1)
df['previous_rank'] = df.loc[:,'rank'].shift(1)
df['previous_list'] = df.loc[:,'rating_list'].shift(1)

In [8]:
#Remove all rows where the previous rating is for a different player
df.loc[df['name'] != df['previous_name'], ['previous_rating','previous_list','previous_rank']] = np.nan

In [9]:
df = df.drop('previous_name', axis=1) #Previous name is now useless

In [10]:
#Time in month from previous rating list to assign weight to rank changes
df['month_diff'] = df.apply(lambda row: get_month_diff(row['previous_list'], row['rating_list']), axis=1)

In [11]:
#Rating difference from previous list
df['rating_diff'] = df.apply(lambda row: row['rating'] - row['previous_rating'], axis=1)

In [12]:
#Rank difference from previous list
df['rank_diff'] = df.apply(lambda row: row['previous_rank'] - row['rank'], axis=1)

In [13]:
#Rating difference multiplied by months since last rating
df['weighted_rating_diff'] = df.apply(lambda row: row['rating_diff'] * row['month_diff'], axis=1)

In [14]:
#Rank difference multiplied by months since last rating
df['weighted_rank_diff'] = df.apply(lambda row: row['rank_diff'] * row['month_diff'], axis=1)

In [15]:
#Drop all rows with no comparison values
df = df.dropna()

In [16]:
#Remove outliers (more than 3 standard deviations)
from scipy import stats
df = df[(np.abs(stats.zscore(df['weighted_rating_diff'])) < 3)]
#df = df[(np.abs(stats.zscore(df['weighted_rank_diff'])) < 3)]

In [17]:
#Show the mean difference in rank and rating between rating lists for each age group
df.groupby(['age']).mean()[['weighted_rank_diff', 'weighted_rating_diff']]

Unnamed: 0_level_0,weighted_rank_diff,weighted_rating_diff
age,Unnamed: 1_level_1,Unnamed: 2_level_1
15,31.2,31.2
16,13.388889,16.388889
17,6.238095,10.095238
18,8.571429,14.696429
19,7.994413,10.52514
20,6.631111,9.551111
21,5.475285,10.665399
22,6.075843,10.176966
23,-0.599068,3.083916
24,1.792812,3.209302
