In [1]:
SEASON = '1973-74'
LEAGUE = 'ABA'

In [2]:
import pandas as pd
import myconstants as c
from scipy.stats import zscore

if LEAGUE == 'NBA':
    NUM_GAMES = c.NBA_NUM_GAMES[SEASON]
    MIN_GAMES = c.NBA_MIN_GAMES[SEASON]
elif LEAGUE == 'ABA':
    NUM_GAMES = c.ABA_NUM_GAMES[SEASON]
    MIN_GAMES = c.ABA_MIN_GAMES[SEASON]
else:
    NUM_GAMES = 10000
    MIN_GAMES = 10000

p_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Player_PerGame.csv')
t_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Team_Standings.csv')
p_advanced_df = pd.read_csv(f'../data/years/{SEASON}/clean/{SEASON}_{LEAGUE}_Player_Advanced.csv')


# join player per game and advanced stats
p_advanced_df = p_advanced_df.drop(columns=['Player', 'Tms', 'Pos', 'Age', 'G', 'MP'])
p_df = p_df.merge(p_advanced_df, on='PlayerID')

# populate team df with team codes
t_df['TeamCode'] = t_df['Team'].apply(lambda x: c.TEAM_CODE[x])

# average team wins per player
def average_wins(tms):
    tms_list = tms.split(',')
    tms_wins = []
    for t in tms_list:
        tms_wins.append(t_df.loc[t_df['TeamCode'] == t]['OverallW'].values[0])
    return sum(tms_wins) / len(tms_wins)
p_df['TmsWins'] = p_df['Tms'].apply(average_wins)

# players who haven't played enough games don't qualify
p_df = p_df.loc[p_df['G'] >= MIN_GAMES]

# ranking algorithm
p_df['zTotals'] = 1.5*zscore(p_df['PTS'] + p_df['TRB'] + p_df['AST'] + p_df['STL'] + p_df['BLK'])
p_df['zWS'] = zscore(p_df['WS'])
p_df['zVORP'] = zscore(p_df['VORP'])
p_df['zBPM'] = zscore(p_df['BPM'])
p_df['zTmsWins'] = zscore(p_df['TmsWins'])
p_df['RawScore'] = p_df['zTotals'] + p_df['zWS'] + p_df['zVORP'] + p_df['zBPM'] + p_df['zTmsWins']
p_df['PlayerScore'] = p_df['RawScore'] + c.PLAYER_SCORE_ADDITION # more positive PlayerScores
p_df['PlayerScore'] = p_df['PlayerScore'].apply(lambda x: pow(x, c.PLAYER_SCORE_EXPONENT) if x > 1 else x) # make higher PlayerScores more significant
p_df = p_df.sort_values('PlayerScore', ascending=False)

# Move [PlayerScore] to the right of [Pos], drop PlayerID, index rows starting from 1, stylistic choice
col = p_df.pop('PlayerScore')
p_df.insert(p_df.columns.get_loc('Pos') + 1, col.name, col)
p_df = p_df.drop('PlayerID', axis=1)
p_df = p_df.reset_index(drop=True)
p_df.index = p_df.index + 1

p_df.head(50)

Unnamed: 0,Player,Tms,Pos,PlayerScore,Age,G,GS,MP,FG,FGA,...,DBPM,BPM,VORP,TmsWins,zTotals,zWS,zVORP,zBPM,zTmsWins,RawScore
1,Julius Erving,NYN,SF,126.477532,23,84,,40.5,10.9,21.3,...,2.9,8.5,9.0,55.0,4.681144,3.84996,5.317555,4.00905,1.338908,19.196618
2,Artis Gilmore,KEN,C,81.722594,24,84,,41.7,7.4,15.0,...,2.2,4.0,5.4,53.0,4.169393,2.656996,2.906181,1.971169,1.128187,12.831927
3,Dan Issel,KEN,PF,66.699289,25,83,,40.3,10.0,20.8,...,-0.8,2.6,3.9,53.0,3.077658,3.002328,1.901442,1.337162,1.128187,10.446777
4,George McGinnis,IND,PF,62.506822,23,80,,40.8,9.9,21.1,...,0.2,2.5,3.7,46.0,4.459386,1.840757,1.767477,1.291875,0.390664,9.750159
5,Willie Wise,UTS,SF,54.271437,26,82,,40.1,8.7,17.8,...,-0.5,2.3,3.6,51.0,2.548849,1.966332,1.700494,1.201303,0.917467,8.334445
6,Billy Paultz,NYN,C,46.213015,25,77,,33.7,6.7,13.6,...,1.1,2.1,2.7,55.0,1.866515,1.464031,1.097651,1.11073,1.338908,6.877835
7,Jimmy Jones,UTS,PG,43.30729,29,83,,38.1,7.0,12.8,...,0.3,2.1,3.3,51.0,1.371822,1.432638,1.499546,1.11073,0.917467,6.332203
8,George Gervin,"VIR,SAS",SG,42.096056,21,74,,33.9,9.1,19.3,...,0.5,3.1,3.2,36.5,2.75355,0.961731,1.432564,1.563593,-0.61026,6.101177
9,Mack Calvin,CAR,PG,35.813364,26,83,,31.2,6.0,13.0,...,-0.3,1.8,2.4,47.0,1.064772,1.432638,0.896703,0.974872,0.496025,4.865009
10,Ted McClain,CAR,SG,35.666119,27,84,,30.7,5.0,10.4,...,2.6,2.7,3.1,47.0,0.723605,0.867549,1.365581,1.382448,0.496025,4.835208
