In [None]:
import pandas as pd
filename = 'nba_elo_latest.csv'
df = pd.read_csv(filename)
print(df.head()) 

In [None]:
df['game_id']=range(1,len(df)+1) #Create a new column for Game IDs
df=df[['game_id','home','away','elo1_pre_home','elo2_pre_away','home_score','away_score']]
df

In [None]:
import numpy as np
df['win']=np.where(df['home_score']>df['away_score'],1,0) #Create a new column for game result
df['diff']=np.where((df['home_score']-df['away_score'])<0,-1*(df['home_score']-df['away_score']),df['home_score']-df['away_score']) #Create a new column for modulus of the difference between the scores
df['d']=np.where(df['win']==1,df['elo1_pre_home']-df['elo2_pre_away']+100,df['elo2_pre_away']-df['elo1_pre_home']-100) #Create a new column to find the difference between the ELO Ratings of the 2 teams inclusive of the home team advantage
df['mov']=(df['diff']+3)**0.8/(7.5+(0.006*df['d'])) #Create a new column for the Margin of Victory
df['K']=df['mov']*40 #Created a new column for the adjusted K factor that includes the Margin of Victory
df['elo_prob_home']=1/(1+10**((df['elo2_pre_away']-df['elo1_pre_home']-100)/400)) #Win Probability for the Home Team.
df['elo_prob_away']=1-df['elo_prob_home'] #Win Probability for the Away Team
df['elo1_post_home']=np.where(df['win']==1,df['elo1_pre_home']+df['K']*(1-df['elo_prob_home']),df['elo1_pre_home']+df['K']*(0-df['elo_prob_home'])) #Find New ELO Rating for Home Team based on Game Result
df['elo2_post_away']=np.where(df['win']==0,df['elo2_pre_away']+df['K']*(1-df['elo_prob_away']),df['elo2_pre_away']+df['K']*(0-df['elo_prob_away'])) #Find New ELO Rating for Away Team based on Game Result
df

In [None]:
df=df[['game_id','home','away','elo1_pre_home','elo2_pre_away','home_score','away_score','elo_prob_home','elo_prob_away','elo1_post_home','elo2_post_away']]
#Find Home Games
home=df.groupby(['game_id','home','away','elo1_pre_home','elo2_pre_away','home_score','away_score','elo_prob_home','elo_prob_away','elo1_post_home','elo2_post_away']).sum().reset_index()
home.rename(columns={'home':'team','away':'opp','elo1_pre_home':'elo_pre_team','elo2_pre_away':'elo_pre_opp','elo_prob_home':'elo_prob_team','elo_prob_away':'elo_prob_opp','elo1_post_home':'elo_post_team','elo2_post_away':'elo_post_opp','home_score':'pts_for','away_score':'pts_agn'},inplace=True)
home['win']=np.where(home['pts_for']>home['pts_agn'],1,0) #Flag Variable for Game Result. 1 for Win and 0 for Loss
home['home']=1 #Flag Variable to identify that the team in question is playing at home
home

#Find Away Games
away=df.groupby(['game_id','home','away','elo1_pre_home','elo2_pre_away','home_score','away_score','elo_prob_home','elo_prob_away','elo1_post_home','elo2_post_away']).sum().reset_index()
away.rename(columns={'home':'opp','away':'team','elo1_pre_home':'elo_pre_opp','elo2_pre_away':'elo_pre_team','elo_prob_home':'elo_prob_opp','elo_prob_away':'elo_prob_team','elo1_post_home':'elo_post_opp','elo2_post_away':'elo_post_team','home_score':'pts_agn','away_score':'pts_for'},inplace=True)
away['win']=np.where(away['pts_for']>away['pts_agn'],1,0) #Flag Variable for Game Result. 1 for Win and 0 for Loss
away['home']=0 #Flag Variable to identify that the team in question is playing away
away

In [None]:
final=pd.concat([home,away]).reset_index() #Concatenate the Home and Away Data Frames
final.drop(['index'],axis=1,inplace=True)
final
final['elo_win']=np.where(final['elo_prob_team']>final['elo_prob_opp'],1,0) #A Game Result is a Win if the Probability of a Team winning the game is greater than that of the Opposition.

In [None]:
gsw=final.loc[final['team']=='GSW'].sort_values(by=['game_id']) #Identify only Golden State Warriors Matches and sort them in ascending order of game id
gsw_elo=[1529.010908] #Create a list of ELO Ratings and initalize the list with GSW's ELO Rating prior to the start of the first game
l=list(gsw['elo_post_team']) #Convert the ELO Ratings of GSW at the end of every game into a list named l
for i in l:
  gsw_elo.append(i) #Append the ELO Ratings at the end of every game to the gsw_elo list

gsw_elo_roundedup=[round(num) for num in gsw_elo] #Round off the values in this list to the nearest whole number

gsw_dict={'gsw_game_id':list(range(0,len(gsw_elo))) , 'ELO_Rating':gsw_elo_roundedup} #Create a Dictionary with 2 keys. The first key holds a new set of game ids starting from zero to indicate ascending order of only GSW's games
#The second key contains the ELO Ratings of GSW for the corresponding gameids
gsw_dict

gsw_df=pd.DataFrame(data=gsw_dict) #Convert Dictionary to a Data Frame
gsw_df

In [None]:
#Graph Out the Variation of GSW's ELO Rating
#Consider the ELO Rating at Game ID 0 to be GSW's initial ELO rating at the beginning of the season prior to playing any game
import plotly.express as px


fig = px.line(gsw_df, x="gsw_game_id", y="ELO_Rating",markers=True)
fig.update_traces(textposition="bottom right")
fig.update_layout(title='Variation of ELO Rating of the Golden State Warriors over the Course of the Season',
                   xaxis_title='Games',
                   yaxis_title='ELO Rating')
fig.show()