In [100]:
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeCV
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

possessions = pd.read_csv('stint_data_20212022.csv')
possessions = possessions[possessions['timeofpos'] > 0]

In [101]:
def build_player_list(posessions):
    players = list(
        set(list(posessions['team1player1'].unique()) + list(posessions['team1player2'].unique()) + list(
            posessions['team1player3']) + \
            list(posessions['team1player4'].unique()) + list(posessions['team1player5'].unique()) + list(
            posessions['team2player1'].unique()) + \
            list(posessions['team2player2'].unique()) + list(posessions['team2player3'].unique()) + list(
            posessions['team2player4'].unique()) + \
            list(posessions['team2player5'].unique())))
    players.sort()
    return players

In [102]:
player_list = build_player_list(possessions)


In [103]:
possessions['PointsPerSec'] = (possessions['team1_points'])  / possessions['timeofpos']


In [104]:
def map_players(row_in, players):
    p1 = row_in[0]
    p2 = row_in[1]
    p3 = row_in[2]
    p4 = row_in[3]
    p5 = row_in[4]
    p6 = row_in[5]
    p7 = row_in[6]
    p8 = row_in[7]
    p9 = row_in[8]
    p10 = row_in[9]

    rowOut = np.zeros([len(players) * 2])

    rowOut[players.index(p1)] = 1
    rowOut[players.index(p2)] = 1
    rowOut[players.index(p3)] = 1
    rowOut[players.index(p4)] = 1
    rowOut[players.index(p5)] = 1

    rowOut[players.index(p6) + len(players)] = -1
    rowOut[players.index(p7) + len(players)] = -1
    rowOut[players.index(p8) + len(players)] = -1
    rowOut[players.index(p9) + len(players)] = -1
    rowOut[players.index(p10) + len(players)] = -1

    return rowOut

In [105]:
# Break the dataframe into x_train (nxm matrix), y_train (nx1 matrix of target values), and weights (not necessary because all rows will have 1 possession)
def convert_to_matricies(possessions, name, players):
    # extract only the columns we need

    # Convert the columns of player ids into a numpy matrix
    stints_x_base = possessions[['team1player1', 'team1player2', 'team1player3', 'team1player4', 'team1player5', 
                        'team2player1', 'team2player2', 'team2player3', 'team2player4', 'team2player4']].to_numpy()
    
    # Apply our mapping function to the numpy matrix
    stint_X_rows = np.apply_along_axis(map_players, 1, stints_x_base, players)

    # Convert the column of target values into a numpy matrix
    stint_Y_rows = possessions[name].to_numpy()

    # extract the possessions as a pandas Series
    possessions = possessions['timeofpos']

    # return matricies and possessions series
    return stint_X_rows, stint_Y_rows, possessions

train_x, train_y, possessions_raw = convert_to_matricies(possessions, 'PointsPerSec', player_list)

In [106]:
# Convert lambda value to alpha needed for ridge CV
def lambda_to_alpha(lambda_value, samples):
    return (lambda_value * samples) / 2.0

# Convert RidgeCV alpha back into a lambda value
def alpha_to_lambda(alpha_value, samples):
    return (alpha_value * 2.0) / samples

In [107]:
def calculate_rapm(train_x, train_y, possessions, lambdas, name, players):
    # convert our lambdas to alphas
    alphas = [lambda_to_alpha(l, train_x.shape[0]) for l in lambdas]

    # create a 5 fold CV ridgeCV model. Our target data is not centered at 0, so we want to fit to an intercept.
    clf = RidgeCV(alphas=alphas, cv=5, fit_intercept=True, normalize=False)

    # fit our training data
    model = clf.fit(train_x, train_y, sample_weight=possessions)

    # convert our list of players into a mx1 matrix
    player_arr = np.transpose(np.array(players).reshape(1, len(players)))
    # extract our coefficients into the offensive and defensive parts
    coef_offensive_array = np.transpose(np.array(model.coef_[ 0:len(players)]).reshape(1, len(players)))
    coef_defensive_array = np.transpose(np.array(model.coef_[len(players):]).reshape(1, len(players)))

    # concatenate the offensive and defensive values with the playey ids into a mx3 matrix
    player_id_with_coef = np.concatenate([player_arr, coef_offensive_array, coef_defensive_array], axis = 1)
    # build a dataframe from our matrix
    players_coef = pd.DataFrame(player_id_with_coef)

    intercept = model.intercept_
    # apply new column names
    players_coef.columns = ['playerId', '{0}__Off'.format(name), '{0}__Def'.format(name)]

    # Add the offesnive and defensive components together (we should really be weighing this to the number of offensive and defensive possession played as they are often not equal).
    players_coef[name] = players_coef['{0}__Off'.format(name)].apply(lambda x: float(x)) + players_coef['{0}__Def'.format(name)].apply(lambda x: float(x))

    # rank the values
    players_coef['{0}_Rank'.format(name)] = players_coef[name].rank(ascending=False)
    players_coef['{0}__Off_Rank'.format(name)] = players_coef['{0}__Off'.format(name)].rank(ascending=False)
    players_coef['{0}__Def_Rank'.format(name)] = players_coef['{0}__Def'.format(name)].rank(ascending=False)

    # add the intercept for reference
    players_coef['{0}__intercept'.format(name)] = intercept

    return players_coef, intercept

In [108]:
lambdas_rapm = [.01, .05, .1]
results, intercept = calculate_rapm(train_x, train_y, possessions_raw, lambdas_rapm, 'RAPM', player_list)

# results = player_names.merge(results, how='inner', on='playerId')

In [109]:
results

Unnamed: 0,playerId,RAPM__Off,RAPM__Def,RAPM,RAPM_Rank,RAPM__Off_Rank,RAPM__Def_Rank,RAPM__intercept
0,Aaron Gordon,0.007538628668686449,-0.005093963001978022,0.002445,255.0,138.0,472.0,0.160002
1,Aaron Henry,-0.009940918710246285,0.0,-0.009941,406.0,400.0,277.5,0.160002
2,Aaron Holiday,-0.007876519192073198,0.006218077710893759,-0.001658,313.0,428.0,166.0,0.160002
3,Aaron Nesmith,-0.004575693929991714,-0.017532806391627277,-0.022109,491.0,478.0,345.0,0.160002
4,Aaron Wiggins,0.005546882285596066,-0.02103859556492177,-0.015492,454.0,166.0,336.0,0.160002
...,...,...,...,...,...,...,...,...
537,Yuta Watanabe,0.01342945350426077,0.029898022371653732,0.043327,2.0,65.0,10.0,0.160002
538,Yves Pons,-0.002510374906202276,0.003929063720442808,0.001419,266.0,504.0,202.0,0.160002
539,Zach LaVine,0.009992100526270602,0.0162859461266275,0.026278,25.0,99.0,63.0,0.160002
540,Zeke Nnaji,0.01478637814277592,0.0137436015588313,0.028530,18.0,52.0,84.0,0.160002


In [110]:
results = np.round(results, decimals=2)
results

Unnamed: 0,playerId,RAPM__Off,RAPM__Def,RAPM,RAPM_Rank,RAPM__Off_Rank,RAPM__Def_Rank,RAPM__intercept
0,Aaron Gordon,0.007538628668686449,-0.005093963001978022,0.00,255.0,138.0,472.0,0.16
1,Aaron Henry,-0.009940918710246285,0.0,-0.01,406.0,400.0,277.5,0.16
2,Aaron Holiday,-0.007876519192073198,0.006218077710893759,-0.00,313.0,428.0,166.0,0.16
3,Aaron Nesmith,-0.004575693929991714,-0.017532806391627277,-0.02,491.0,478.0,345.0,0.16
4,Aaron Wiggins,0.005546882285596066,-0.02103859556492177,-0.02,454.0,166.0,336.0,0.16
...,...,...,...,...,...,...,...,...
537,Yuta Watanabe,0.01342945350426077,0.029898022371653732,0.04,2.0,65.0,10.0,0.16
538,Yves Pons,-0.002510374906202276,0.003929063720442808,0.00,266.0,504.0,202.0,0.16
539,Zach LaVine,0.009992100526270602,0.0162859461266275,0.03,25.0,99.0,63.0,0.16
540,Zeke Nnaji,0.01478637814277592,0.0137436015588313,0.03,18.0,52.0,84.0,0.16


In [111]:
results.to_csv('data/rapm.csv')
print(results)

            playerId              RAPM__Off              RAPM__Def  RAPM  RAPM_Rank  RAPM__Off_Rank  RAPM__Def_Rank  RAPM__intercept
0       Aaron Gordon   0.007538628668686449  -0.005093963001978022  0.00      255.0           138.0           472.0             0.16
1        Aaron Henry  -0.009940918710246285                    0.0 -0.01      406.0           400.0           277.5             0.16
2      Aaron Holiday  -0.007876519192073198   0.006218077710893759 -0.00      313.0           428.0           166.0             0.16
3      Aaron Nesmith  -0.004575693929991714  -0.017532806391627277 -0.02      491.0           478.0           345.0             0.16
4      Aaron Wiggins   0.005546882285596066   -0.02103859556492177 -0.02      454.0           166.0           336.0             0.16
..               ...                    ...                    ...   ...        ...             ...             ...              ...
537    Yuta Watanabe    0.01342945350426077   0.029898022371653732  0