In [35]:
SEASON = '2021-22'
LEAGUE = 'NBA'

In [36]:
import pandas as pd
import myconstants as c
from scipy.stats import zscore

if LEAGUE == 'NBA':
    NUM_GAMES = c.NBA_NUM_GAMES[SEASON]
    MIN_GAMES = c.NBA_MIN_GAMES[SEASON]
elif LEAGUE == 'ABA':
    NUM_GAMES = c.ABA_NUM_GAMES[SEASON]
    MIN_GAMES = c.ABA_MIN_GAMES[SEASON]
else:
    NUM_GAMES = 10000
    MIN_GAMES = 10000

p_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Player_PerGame.csv')
t_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Team_Standings.csv')
p_advanced_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Player_Advanced.csv')


# join player per game and advanced stats
p_advanced_df = p_advanced_df.drop(columns=['Player', 'Tms', 'Pos', 'Age', 'G', 'MP'])
p_df = p_df.merge(p_advanced_df, on='PlayerID')

# populate team df with team codes
t_df['TeamCode'] = t_df['Team'].apply(lambda x: c.TEAM_CODE[x])

# average team wins per player
def average_wins(tms):
    tms_list = tms.split(',')
    tms_wins = []
    for t in tms_list:
        tms_wins.append(t_df.loc[t_df['TeamCode'] == t]['OverallW'].values[0])
    return sum(tms_wins) / len(tms_wins)
p_df['TmsWins'] = p_df['Tms'].apply(average_wins)

# players who haven't played enough games don't qualify
p_df = p_df.loc[p_df['G'] >= MIN_GAMES]

# ranking algorithm
p_df['zTotals'] = 1.5*zscore(p_df['PTS'] + p_df['TRB'] + p_df['AST'] + p_df['STL'] + p_df['BLK'])
p_df['zWS'] = zscore(p_df['WS'])
p_df['zVORP'] = zscore(p_df['VORP'])
p_df['zBPM'] = zscore(p_df['BPM'])
p_df['zTmsWins'] = zscore(p_df['TmsWins'])
p_df['RawScore'] = p_df['zTotals'] + p_df['zWS'] + p_df['zVORP'] + p_df['zBPM'] + p_df['zTmsWins']
p_df['PlayerScore'] = p_df['RawScore'] + c.PLAYER_SCORE_ADDITION # more positive PlayerScores
p_df['PlayerScore'] = p_df['PlayerScore'].apply(lambda x: pow(x, c.PLAYER_SCORE_EXPONENT) if x > 1 else x) # make higher PlayerScores more significant
p_df = p_df.sort_values('PlayerScore', ascending=False)

# Move [PlayerScore] to the right of [Pos], drop PlayerID, index rows starting from 1, stylistic choice
col = p_df.pop('PlayerScore')
p_df.insert(p_df.columns.get_loc('Pos') + 1, col.name, col)
p_df = p_df.drop('PlayerID', axis=1)
p_df = p_df.reset_index(drop=True)
p_df.index = p_df.index + 1

p_df.head(50)

Unnamed: 0,Player,Tms,Pos,PlayerScore,Age,G,GS,MP,FG,FGA,...,DBPM,BPM,VORP,TmsWins,zTotals,zWS,zVORP,zBPM,zTmsWins,RawScore
1,Nikola Jokić,DEN,C,141.761915,26,74,74,33.5,10.3,17.7,...,4.5,13.7,9.8,48.0,4.989229,4.428109,6.167271,5.030314,0.572813,21.187736
2,Giannis Antetokounmpo,MIL,PF,115.819603,27,67,67,32.9,10.3,18.6,...,3.5,11.2,7.4,51.0,4.765468,3.542592,4.487294,4.116602,0.848498,17.760453
3,Joel Embiid,PHI,C,102.775053,27,68,68,33.8,9.8,19.6,...,2.0,9.2,6.5,51.0,4.653587,3.196085,3.857303,3.385631,0.848498,15.941104
4,Luka Dončić,DAL,PG,85.300674,22,65,65,35.4,9.9,21.6,...,1.8,8.2,5.9,52.0,4.477775,1.502052,3.437309,3.020146,0.940393,13.377674
5,Kevin Durant,BRK,PF,72.677977,33,55,55,37.2,10.5,20.3,...,0.7,7.2,4.8,44.0,4.078201,1.810058,2.667319,2.654661,0.205232,11.415471
6,Jayson Tatum,BOS,SF,69.752381,23,76,76,35.9,9.3,20.6,...,0.6,4.9,4.8,51.0,3.342985,2.272067,2.667319,1.814045,0.848498,10.944914
7,Trae Young,ATL,PG,68.718627,23,76,76,34.9,9.4,20.3,...,-2.0,5.2,4.8,43.0,3.646661,2.42607,2.667319,1.923691,0.113337,10.777078
8,LeBron James,LAL,SF,68.285978,37,56,56,37.2,11.4,21.8,...,0.8,7.7,5.1,33.0,4.333928,1.463551,2.877316,2.837404,-0.805613,10.706585
9,Karl-Anthony Towns,MIN,C,66.315536,26,74,74,33.5,8.7,16.4,...,0.5,5.0,4.4,46.0,3.215121,2.541572,2.387323,1.850594,0.389022,10.383632
10,Chris Paul,PHO,PG,65.680054,36,65,65,32.9,5.6,11.3,...,2.3,5.4,4.0,64.0,1.936484,2.195065,2.107327,1.996788,2.043134,10.278798
