In [30]:
SEASON = '2008-09'

In [31]:
import pandas as pd
import myconstants as c
from scipy.stats import zscore

NUM_GAMES = c.NUM_GAMES[SEASON]
MIN_GAMES = NUM_GAMES * 0.60

p_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_Player_PerGame.csv')
t_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_Team_Standings.csv')

# join player per game and advanced stats
p_advanced_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_Player_Advanced.csv')
p_advanced_df = p_advanced_df.drop(columns=['Player', 'Tms', 'Pos', 'Age', 'G', 'MP'])
p_df = p_df.merge(p_advanced_df, on='PlayerID')

# populate team df with team codes
t_df['TeamCode'] = t_df['Team'].apply(lambda x: c.TEAM_CODE[x])

# average team wins per player
def average_wins(tms):
    tms_list = tms.split(',')
    tms_wins = []
    for t in tms_list:
        tms_wins.append(t_df.loc[t_df['TeamCode'] == t]['OverallW'].values[0])
    return sum(tms_wins) / len(tms_wins)
p_df['TmsWins'] = p_df['Tms'].apply(average_wins)

# players who haven't played enough games don't qualify
p_df = p_df.loc[p_df['G'] >= MIN_GAMES]

# ranking algorithm
p_df['zTotals'] = 1.5*zscore(p_df['PTS'] + p_df['TRB'] + p_df['AST'] + p_df['STL'] + p_df['BLK'])
p_df['zWS'] = zscore(p_df['WS'])
p_df['zVORP'] = zscore(p_df['VORP'])
p_df['zBPM'] = zscore(p_df['BPM'])
p_df['zTmsWins'] = zscore(p_df['TmsWins'])
p_df['RawScore'] = p_df['zTotals'] + p_df['zWS'] + p_df['zVORP'] + p_df['zBPM'] + p_df['zTmsWins']
p_df['PlayerScore'] = p_df['RawScore'] + c.PLAYER_SCORE_ADDITION # more positive PlayerScores
p_df['PlayerScore'] = p_df['PlayerScore'].apply(lambda x: pow(x, c.PLAYER_SCORE_EXPONENT) if x > 1 else x) # make higher PlayerScores more significant
p_df = p_df.sort_values('PlayerScore', ascending=False)

# Move [PlayerScore] to the right of [Pos], drop PlayerID, index rows starting from 1, stylistic choice
col = p_df.pop('PlayerScore')
p_df.insert(p_df.columns.get_loc('Pos') + 1, col.name, col)
p_df = p_df.drop('PlayerID', axis=1)
p_df = p_df.reset_index(drop=True)
p_df.index = p_df.index + 1

p_df.head(50)

Unnamed: 0,Player,Tms,Pos,PlayerScore,Age,G,GS,MP,FG,FGA,...,DBPM,BPM,VORP,TmsWins,zTotals,zWS,zVORP,zBPM,zTmsWins,RawScore
1,LeBron James,CLE,SF,159.438612,24,81,81,37.7,9.7,19.9,...,3.7,13.2,11.8,66.0,4.648706,5.153965,6.633461,5.154413,1.812728,23.403273
2,Chris Paul,NOH,PG,124.083683,23,78,78,38.5,8.1,16.1,...,3.8,11.0,9.9,49.0,4.010158,4.520919,5.464233,4.323453,0.558912,18.877675
3,Dwyane Wade,MIA,SG,114.882577,27,79,79,38.6,10.8,22.0,...,2.4,10.6,9.6,43.0,4.682314,3.381435,5.279618,4.172369,0.116389,17.632125
4,Kobe Bryant,LAL,SG,85.070695,30,82,82,36.1,9.8,20.9,...,0.3,5.9,5.9,65.0,3.455629,2.748389,3.002701,2.397136,1.738974,13.342829
5,Dwight Howard,ORL,C,77.104768,23,79,79,35.7,7.1,12.4,...,2.4,4.5,4.7,59.0,3.59006,3.096564,2.264241,1.868343,1.296451,12.11566
6,Brandon Roy,POR,SG,77.021451,24,78,78,37.2,8.1,16.9,...,0.1,6.2,6.0,54.0,2.59863,3.001607,3.064239,2.510449,0.927681,12.102607
7,Pau Gasol,LAL,C,74.778366,28,81,81,37.0,7.3,12.9,...,0.5,4.5,5.0,65.0,2.565023,3.128217,2.448856,1.868343,1.738974,11.749412
8,Tim Duncan,SAS,C,65.466769,32,75,75,33.7,7.4,14.8,...,2.0,5.4,4.7,54.0,2.917904,1.925429,2.264241,2.208281,0.927681,10.243537
9,Dirk Nowitzki,DAL,PF,61.470157,30,81,81,37.7,9.6,20.0,...,-0.3,3.4,4.2,50.0,3.354806,2.178647,1.95655,1.452863,0.632666,9.575532
10,Paul Pierce,BOS,SF,56.625707,31,81,81,37.5,6.7,14.6,...,0.7,3.1,3.9,62.0,2.128121,1.988733,1.771935,1.33955,1.517712,8.746052
