In [21]:
import pandas as pd
from multielo import MultiElo, Player, Tracker
import altair as alt

In [22]:
df_competition = pd.read_excel("Data/DL_competitions.xlsx")
df_competitors = pd.read_csv("Data/800-metres_men_withoutPB.csv")
df_competitors.dropna(how='all', inplace=True)
df_competitors = df_competitors[df_competitors["Position in the competition"].str.contains("f") == False]
df_competitors = df_competitors[df_competitors["Position in the competition"].str.contains("h") == False]

In [23]:
#Create a flag where the date need to be cleaned
df_competition['two_dates'] = df_competition.Date.str[2] == '–'

#Sub dataframe with a copy of the date to be cleaned
df_duplicate = df_competition[df_competition['two_dates'] == True]

#Cleaning date in the 1st dataframe by keeping the first date
df_competition.loc[df_competition.two_dates==True, 'Date'] = df_competition.loc[df_competition.two_dates==True, 'Date'].str[3:]

#Cleaning date in the 2nd dataframe by keeping the second date
df_duplicate.Date = df_duplicate.Date.str[0:2] + ' ' + df_duplicate.Date.str[6:]

#Agrregating results
df_competition_clean = df_competition.append([df_duplicate],ignore_index=True)

#Merging competion and results based on the date and the place of the competion
df = pd.merge(df_competition_clean, df_competitors, how='inner', left_on=['Date', 'Venue'], right_on=['Date','Place of the competition'])

#Pivoting table to have Position in the competition as columns and some cleaning
df_pivoted = df.pivot_table(index=['Date', 'Name', 'Country'], columns='Position in the competition', values='Competitor', aggfunc=lambda x: ' '.join(x))
df_pivoted.reset_index(level=['Date', 'Name', 'Country'], inplace=True)
df_pivoted.rename(columns = {'Date':'date'}, inplace = True)
df_pivoted = df_pivoted[df_pivoted['2'].notna()] #to be sure there is at least two finishers in the competion else the elo score can not be computed
df_pivoted.date = pd.to_datetime(df_pivoted.date)

#preparing dataframe tracker to use multielo package (keeping columns of interest and replacing NA by None)
#Careful here I only pick the first 10th finishers of a race 
df_tracker = df_pivoted[['date','1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11']]
df_tracker = df_tracker.where(pd.notnull(df_pivoted), None)

#USing Multielo package to get rankings (maybe the score function need to be optimised -> see documentation)
tracker = Tracker(elo_rater=MultiElo(score_function_base=1))
tracker.process_data(df_tracker)
tracker.get_current_ratings()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


Unnamed: 0,rank,player_id,n_games,rating
0,1,Nijel AMOS,28,1303.179209
1,2,David RUDISHA,21,1203.666081
2,3,Emmanuel Kipkurui KORIR,20,1185.504963
3,4,Donavan BRAZIER,8,1166.518108
4,5,Ferguson Cheruiyot ROTICH,43,1158.920022
...,...,...,...,...
204,205,Richard KIPLAGAT,7,928.480919
205,206,Timothy KITUM,12,926.314266
206,207,Andreas KRAMER,12,915.888468
207,208,Andrew OSAGIE,16,915.571752


In [24]:
#Plotting the result

alt.renderers.enable('notebook')

scales = alt.selection_interval(bind='scales')

df_historical_rating = tracker.get_history_df()

highlight = alt.selection(type='single', on='mouseover', 
                          fields=['player_id'], nearest=True, empty="none")

base = alt.Chart(df_historical_rating
).encode(
    x=alt.X("date:T", axis=alt.Axis(format='%Y %B')),
    y=alt.Y("rating:Q", scale=alt.Scale(zero=False)),
    #color="player_id:N",
    tooltip = ["player_id","rating"]
)

points = base.mark_circle(point=True).encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=1350,
    height=500
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3)),
    color=alt.condition(highlight, 'player_id', alt.value("lightgray"), legend=None)
)

alt.layer(points, lines).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).add_selection(
    scales
)

<vega.vegalite.VegaLite at 0x7f9ef9da9ee0>

