In [99]:
#Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.preprocessing import PolynomialFeatures
pd.set_option('display.max_columns', 500)

%matplotlib inline


In [100]:

nba = pd.read_csv('data/nba_stats_info2.csv', index_col = 0)
ncaa = pd.read_csv('data/ncaa_stats_info2.csv', index_col = 0)

  interactivity=interactivity, compiler=compiler, result=result)


In [101]:
#convert grade labels to numbers
grades = []
for x in ncaa.grade:
    if x in ('Fr','RS-Fr'):
        grades.append(1)
    elif x in ('So','RS-So'):
        grades.append(2)
    elif x in ('Jr','RS-Jr'):
        grades.append(3)
    else:
        grades.append(4)
        
ncaa.grade = grades

#user 61 as undrafted instead of 99
ncaa.pick = [61 if x == 99 else x for x in ncaa.pick]
nba.pick = [61 if x == 99 else x for x in nba.pick]

In [102]:
#create per 36 minute stats
countingStats = ['fgm','fga','fg3m','fg3a','ftm','fta','oreb','dreb','reb','ast' ,'stl','blk','pf','tov','pts']
per36cols = [x+'_p36' for x in countingStats]
for stat,col in zip(countingStats,per36cols):
    ncaa[col] = ncaa[stat] / ncaa.mp * 36
    nba[col] = nba[stat] / nba.mp * 36
    
#create several multiplied stats
stat1 = ['pts', 'pts',   'pts','pts','pts','pts','ast',    'ast','ast','ast','ast','reb','reb','stl','ast_pct','ast_pct','blk_pct']
stat2 = ['ast', 'ts_pct','reb','stl','blk','tov','ast_pct','reb','stl','blk','tov','blk','stl','blk','ts_pct','stl_pct','stl_pct']
mstatcols = [s1+'_'+s2 for s1,s2 in zip(stat1,stat2)]
for s1,s2 in zip(stat1,stat2):
    ncaa[s1+'_'+s2] = ncaa[s1] * ncaa[s2]
    nba[s1+'_'+s2] = nba[s1] * nba[s2]
    
#create win shares per 48 minutes
ws = ['ows','dws','ws']
for x in ws:
    ncaa[x+'_p48'] = ncaa[x] / ncaa.mp_tot * 48
    nba[x+'_p48'] = nba[x] / nba.mp_tot * 48

In [103]:
#filter nba data for players who played at least 15 games in a season
nba_played = nba.loc[nba.gp >= 15]

#filter college data for players who played at least 10 games in a season
ncaa_played = ncaa.loc[(ncaa.gp >= 15)]

#get rookie nba seasons
rookies = nba_played.loc[nba_played.season_count == 1]

#get 3rd yr season
yr3 = nba_played.loc[nba_played.season_count == 3]

#get last college season
last_college_stats = ncaa_played.groupby('name').last().reset_index()

preNBA = last_college_stats.loc[(last_college_stats.highest_level_reached.isin(['NBA','2017-18']))]


In [104]:
ncaa_to_rook = preNBA.merge(rookies[['realgm_summary_page','season_count','ts_pct','fg3_pct','blk_pct','ast_pct','stl_pct','reb_pct','ows_p48','ortg','dws_p48','drtg','ws_p48','per']], 
             on = 'realgm_summary_page', suffixes=(['','_nba']), how='left')

ncaa_to_yr3 = preNBA.merge(yr3[['realgm_summary_page','season_count','ts_pct','fg3_pct','blk_pct','ast_pct','stl_pct','reb_pct','ows_p48','ortg','dws_p48','drtg','ws_p48','per']], 
             on = 'realgm_summary_page', suffixes=(['','_nba']), how='left')

In [105]:
ncaa_to_rook.weight = pd.to_numeric(ncaa_to_rook.weight)
ncaa_to_rook = ncaa_to_rook.loc[~ncaa_to_rook.age.isnull()]

ncaa_to_yr3.weight = pd.to_numeric(ncaa_to_yr3.weight)
ncaa_to_yr3 = ncaa_to_yr3.loc[~ncaa_to_yr3.age.isnull()]

In [106]:
def get_kbest(X,y,score_func = f_regression, k = 10):    
    """SElect the best k features using SelectKBest algorithm"""
    X_scaled = StandardScaler().fit_transform(X)
    kbest = SelectKBest(score_func, k=k)
    kbest.fit(X_scaled,y)
    kbest_fts = []
    for label in X.columns[kbest.get_support()]:
        kbest_fts.append(label)
    return kbest_fts

In [107]:
def avg_score(X,y, model,score="neg_mean_squared_error", cv=5):
    """Average score of a model using k-fold cross-validation, scoreing method and k taken as inputs, as well as model"""
    scores = cross_val_score(model, X, y,
                             scoring=score, cv=cv)
    if score == "neg_mean_squared_error":
        scores = np.sqrt(-scores)
    return scores.mean()


def ridge(X,y):
    ridge = Pipeline([
        ("scaler", StandardScaler()),
        ("ridge", Ridge()),
        ]) 
    
    alphas = np.logspace(-3,5,9)
    solvers = ['svd', 'cholesky', 'lsqr']
    
    param_grid = [
            {'ridge__alpha':alphas,'ridge__solver':solvers}
            ]
    grid_search = GridSearchCV(ridge, param_grid, scoring = 'r2')
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_estimator_

def lasso(X,y):
    lasso = Pipeline([
        ("scaler", StandardScaler()),
        ("lasso", Lasso()),
        ]) 
    
    alphas = np.logspace(-1,5,6)
    
    param_grid = [
            {'lasso__alpha':alphas}
            ]
    grid_search = GridSearchCV(lasso, param_grid, scoring = 'r2')
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_estimator_

def testdf(data,Xtr,ytr,m):
    m.fit(Xtr,ytr)
    y_pred = m.predict(Xtest)
    real_pred = pd.DataFrame(list(zip(y_pred,ytest)), index = ytest.index, columns = ['pred','test'])
    df = data[['name']].merge(real_pred, left_index=True, right_index = True).sort_values('pred', ascending=False)
    return df

def holddf(data,Xtr,ytr,m):
    m.fit(Xtr,ytr)
    y_pred_h = pd.DataFrame(m.predict(Xholdout), index = yholdout.index, columns = ['pred'])
    df = data[['name']].merge(y_pred_h, left_index=True, right_index = True).sort_values('pred', ascending=False)
    return df

In [108]:
training_cols = ['grade','mp','fgm','fga','fg_pct','fg3m','fg3a','fg3_pct','ftm','fta','ft_pct','oreb','dreb','reb','ast'
                 ,'stl','blk','pf','tov','pts','dbl_dbl','tpl_dbl','pts40','pts20','ast20','ast_to','stl_to','ft_fga','win_pct'
                 ,'ows','dws','ws','ts_pct','efg_pct','oreb_pct','dreb_pct','ast_pct','tov_pct','stl_pct','blk_pct'
                 ,'usg_pct','ortg','drtg','per','season_count','height','weight','age']+per36cols+mstatcols

def target_stat(data, target, features = training_cols):
    if np.mean(data.season_count_nba) == 1:
        Xtr = data.loc[(data.year <2016) & (~data[stat].isnull())][features].fillna(0)
        ytr = data.loc[(data.year <2016) & (~data[stat].isnull())][target].fillna(0)
        Xtest = data.loc[data.year == 2016][features].fillna(0)
        ytest = data.loc[data.year == 2016][target]
        Xholdout = data.loc[data.highest_level_reached == '2017-18'][features].fillna(0)
        yholdout = data.loc[data.highest_level_reached == '2017-18'][target]
    else:
        Xtr = data.loc[(data.year <2014) & (~data[stat].isnull())][features].fillna(0)
        ytr = data.loc[(data.year <2014) & (~data[stat].isnull())][target].fillna(0)
        Xtest = data.loc[data.year == 2014][features].fillna(0)
        ytest = data.loc[data.year == 2014][target]
        Xholdout = data.loc[data.highest_level_reached == '2017-18'][features].fillna(0)
        yholdout = data.loc[data.highest_level_reached == '2017-18'][target]
    return Xtr, ytr, Xtest, ytest, Xholdout, yholdout



In [109]:
Xtr, ytr, Xtest, ytest, Xholdout, yholdout = target_stat(ncaa_to_rook,'fg3_pct_nba')
model = ridge(Xtr,ytr)

rmse_train = avg_score(Xtr,ytr,model,cv=10, score = 'r2')

rmse_train

0.25270705109232494

In [110]:
testdf(ncaa_to_rook,Xtr, ytr,model).head(10)

Unnamed: 0,name,pred,test
917,Lonzo Ball,0.355635,0.305
951,Malik Monk,0.33331,0.342
400,Donovan Mitchell,0.326093,0.34
927,Luke Kennard,0.32609,0.415
657,Jayson Tatum,0.304357,0.434
981,Markelle Fultz,0.299784,
349,Dennis Smith,0.296561,0.313
1055,Monte Morris,0.2962,
651,Jawun Evans,0.294832,0.278
321,De'Aaron Fox,0.293069,0.307


In [111]:
coefs = pd.DataFrame(list(zip(training_cols,model.named_steps['ridge'].coef_)), columns = ['feature','coef'])
coefs.sort_values('coef',ascending = False).head(10)

Unnamed: 0,feature,coef
29,ows,0.017822
17,pf,0.016191
31,ws,0.015418
45,height,0.013956
25,ast_to,0.012042
6,fg3a,0.010332
51,fg3a_p36,0.010132
76,stl_blk,0.009851
7,fg3_pct,0.009243
5,fg3m,0.008774


In [112]:
from sklearn.linear_model import SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

def sgd(X,y,scoring = 'r2'):
    sgd = Pipeline([
        ("scaler", StandardScaler()),
        ("sgd", SGDRegressor(max_iter = 1000, tol = .01)),
        ]) 
    loss = ['squared_loss','huber','epsilon_insensitive']
    penalty  = ['none','l2','l1','elasticnet']
    alphas = np.logspace(-3,3,7)
    
    param_grid = [
            {'sgd__loss': loss,'sgd__penalty':penalty,'sgd__alpha':alphas}
            ]
    grid_search = GridSearchCV(sgd, param_grid, scoring = scoring)
    grid_search.fit(X, y)
    grid_search.best_params_
    return sgd.fit(X,y)

def tree(X,y,scoring = 'r2'):
    tree = Pipeline([
        ("scaler", StandardScaler()),
        ("tree", DecisionTreeRegressor()),
        ]) 
        
    min_samples_split = range(2, 10)
    n_features = ['auto','sqrt','log2']
    
    param_grid = [
            {'tree__min_samples_split': min_samples_split,'tree__max_features':n_features}
            ]
    grid_search = GridSearchCV(tree, param_grid, scoring = scoring)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_estimator_

def forest(X,y,scoring = 'r2'):
    forest = Pipeline([
        ("scaler", StandardScaler()),
        ("forest", RandomForestRegressor()),
        ]) 
        
    n_estimators  = [10,100]
    min_samples_split = range(2,5)
    
    param_grid = [
            {'forest__n_estimators':n_estimators,'forest__min_samples_split': min_samples_split}
            ]
    grid_search = GridSearchCV(forest, param_grid, scoring = scoring)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_estimator_


In [113]:
Xtr, ytr, Xtest, ytest, Xholdout, yholdout = target_stat(ncaa_to_rook,'ast_pct_nba')
x = ridge(Xtr,ytr)

print(avg_score(Xtr,ytr,x,cv=10, score = 'neg_mean_squared_error'))
print(avg_score(Xtr,ytr,x,cv=10, score = 'r2'))

6.533092776451211
0.41518849791734275


In [114]:
testdf(ncaa_to_rook,Xtr, ytr,x)

Unnamed: 0,name,pred,test
651,Jawun Evans,24.496771,18.09
917,Lonzo Ball,23.592387,29.16
321,De'Aaron Fox,20.126179,24.61
349,Dennis Smith,19.617454,29.27
981,Markelle Fultz,18.841297,
1055,Monte Morris,18.795176,
474,Frank Mason,18.499236,22.98
360,"Derrick Walton, Jr.",16.352935,14.63
431,Edmond Sumner,16.088605,
1443,Xavier Rathan-Mayes,13.989951,


In [115]:
def scoredf(data):
    mainDict = {}
    stats = ['ts_pct_nba','fg3_pct_nba','ast_pct_nba','reb_pct_nba','stl_pct_nba','blk_pct_nba','ows_p48_nba','dws_p48_nba','per_nba','pick']
    models = ['ridge','lasso','sgd','forest']
    for stat in stats:
        mainDict[stat] = {}
        Xtr, ytr, Xtest, ytest, Xholdout, yholdout = target_stat(data,stat)
        mainDict[stat]['ridge'] = {}
        mainDict[stat]['ridge']['model'] = ridge(Xtr, ytr)
        mainDict[stat]['lasso'] = {}
        mainDict[stat]['lasso']['model']  = lasso(Xtr, ytr)
        mainDict[stat]['sgd'] = {}
        mainDict[stat]['sgd']['model']  = sgd(Xtr, ytr)
        mainDict[stat]['forest'] = {}
        mainDict[stat]['forest']['model']  = forest(Xtr, ytr)
        mse = []
        r2 = []
        for model in models:
            mainDict[stat][model]['mse'] = avg_score(Xtr,ytr,mainDict[stat][model]['model'],cv=10, score = 'neg_mean_squared_error')
            mainDict[stat][model]['r2'] = avg_score(Xtr,ytr,mainDict[stat][model]['model'],cv=10, score = 'r2')
            mainDict[stat][model]['test'] = testdf(data,Xtr,ytr,mainDict[stat][model]['model'])
            mainDict[stat][model]['hold'] = holddf(data,Xtr,ytr,mainDict[stat][model]['model'])
            if model != 'forest':
                mainDict[stat][model]['coefs'] = pd.DataFrame(list(zip(training_cols,mainDict[stat][model]['model'].named_steps[model].coef_)), columns = ['feature','coef']) 
            else:
                mainDict[stat][model]['coefs'] = pd.DataFrame(list(zip(training_cols,mainDict[stat][model]['model'].named_steps[model].feature_importances_)), columns = ['feature','coef']) 
    return mainDict

In [52]:
rook_proj=scoredf(ncaa_to_rook)
yr3_proj=scoredf(ncaa_to_yr3)

In [53]:
import pickle
with open('rook_proj.pickle', 'wb') as handle:
    pickle.dump(rook_proj, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('yr3_proj.pickle', 'wb') as handle2:
    pickle.dump(yr3_proj, handle2, protocol=pickle.HIGHEST_PROTOCOL)

In [54]:
import pickle
with open('rook_proj.pickle', 'rb') as handle:
    rook_proj = pickle.load(handle)
    
with open('yr3_proj.pickle', 'rb') as handle2:
    yr3_proj = pickle.load(handle2)

In [116]:
stats = [stat for stat in rook_proj]
models = ['ridge','lasso','sgd','forest']

mses = [[rook_proj[stat][model]['mse']  for model in models] for stat in rook_proj]
mse = pd.DataFrame(mses, index = stats, columns = models)
mse

Unnamed: 0,ridge,lasso,sgd,forest
fg3_pct_nba,0.145685,0.169386,0.149265,0.149451
ts_pct_nba,0.207786,0.219831,0.213482,0.218268
ast_pct_nba,6.533093,6.566024,6.576147,6.641526
reb_pct_nba,4.581142,4.580915,4.725248,4.831944
stl_pct_nba,0.841967,0.858693,0.864227,0.85584
blk_pct_nba,1.192709,1.192782,1.230892,1.251785
ows_p48_nba,0.047496,0.050346,0.048744,0.048554
dws_p48_nba,0.025871,0.027723,0.026412,0.026425
per_nba,5.363521,5.336953,5.423651,5.52907
pick,13.907018,13.90255,14.070747,14.427226


In [117]:
stats = [stat for stat in rook_proj]
models = ['ridge','lasso','sgd','forest']

r2s = [[rook_proj[stat][model]['r2']  for model in models] for stat in rook_proj]
r2 = pd.DataFrame(r2s, index = stats, columns = models)
r2

Unnamed: 0,ridge,lasso,sgd,forest
fg3_pct_nba,0.252707,-0.009646,0.208006,0.208149
ts_pct_nba,0.102105,-0.00318,0.077438,0.011102
ast_pct_nba,0.415188,0.409364,0.384873,0.389498
reb_pct_nba,0.287308,0.287631,0.248363,0.197013
stl_pct_nba,0.168911,0.134427,0.126204,0.130483
blk_pct_nba,0.38955,0.388106,0.355271,0.328055
ows_p48_nba,0.100373,-0.010149,0.068444,0.06212
dws_p48_nba,0.117789,-0.013371,0.054681,0.079076
per_nba,0.183333,0.191284,0.153428,0.134903
pick,0.553396,0.553394,0.543122,0.516332


In [118]:
stats = [stat for stat in yr3_proj]
models = ['ridge','lasso','sgd','forest']

r2sy3 = [[yr3_proj[stat][model]['r2']  for model in models] for stat in yr3_proj]
r2y3 = pd.DataFrame(r2sy3, index = stats, columns = models)
r2y3

Unnamed: 0,ridge,lasso,sgd,forest
fg3_pct_nba,0.248994,-0.015467,0.205544,0.199503
ts_pct_nba,0.234286,-0.011668,0.192542,0.189334
ast_pct_nba,0.33152,0.32889,0.299459,0.278057
reb_pct_nba,0.318865,0.331827,0.263985,0.285144
stl_pct_nba,0.22314,0.204558,0.191146,0.199178
blk_pct_nba,0.363445,0.356254,0.291695,0.333406
ows_p48_nba,0.107243,-0.015787,0.047777,0.074922
dws_p48_nba,0.218396,-0.013865,0.165412,0.196504
per_nba,0.317895,0.328786,0.296816,0.285992
pick,0.560235,0.560802,0.541989,0.529475


In [142]:
ts = rook_proj['ts_pct_nba']['ridge']['test'].reset_index(drop = True)
ts = ts.merge(ncaa_to_rook[['name','ts_pct_nba']].loc[(ncaa_to_rook.name.isin(list(ts.name)))&(ncaa_to_rook.fga >=5)],on='name')[['name','pred','ts_pct_nba']]
ts['rank'] = [x+1 for x in range(len(ts))]
ts.head(10)

Unnamed: 0,name,pred,ts_pct_nba,rank
0,Zach Collins,0.549214,0.475,1
1,Lonzo Ball,0.543062,0.444,2
2,T.J. Leaf,0.519138,0.545,3
3,Lauri Markkanen,0.517479,0.552,4
4,Josh Jackson,0.515128,0.48,5
5,Luke Kennard,0.509648,0.56,6
6,John Collins,0.508838,0.62,7
7,Jayson Tatum,0.500009,0.586,8
8,Bam Adebayo,0.498733,0.57,9
9,De'Aaron Fox,0.498479,0.478,10


In [177]:
tshold = rook_proj['ts_pct_nba']['ridge']['hold'].reset_index(drop = True)
tshold = tshold.merge(ncaa_to_rook[['name','ts_pct_nba']].loc[(ncaa_to_rook.name.isin(list(tshold.name)))&(ncaa_to_rook.fga >=5)],on='name')[['name','pred','ts_pct_nba']]
tshold['rank'] = [x+1 for x in range(len(tshold))]
tshold.head(10)

Unnamed: 0,name,pred,ts_pct_nba,rank
0,Jaren Jackson,0.569219,,1
1,DeAndre Ayton,0.556599,,2
2,"Wendell Carter, Jr.",0.53482,,3
3,Marvin Bagley III,0.528007,,4
4,Jock Landale,0.518278,,5
5,Jalen Brunson,0.510537,,6
6,Udoka Azubuike,0.509391,,7
7,Daniel Gafford,0.49918,,8
8,Cassius Winston,0.494219,,9
9,Jontay Porter,0.491528,,10


In [178]:
fg3 = rook_proj['fg3_pct_nba']['ridge']['test'].reset_index(drop = True)
fg3 = fg3.merge(ncaa_to_rook[['name','fg3_pct_nba']].loc[(ncaa_to_rook.name.isin(list(fg3.name)))&(ncaa_to_rook.fga >=5)],on='name')[['name','pred','fg3_pct_nba']]
fg3['rank'] = [x+1 for x in range(len(fg3))]
fg3.head(10)

Unnamed: 0,name,pred,fg3_pct_nba,rank
0,Lonzo Ball,0.355635,0.305,1
1,Malik Monk,0.33331,0.342,2
2,Donovan Mitchell,0.326093,0.34,3
3,Luke Kennard,0.32609,0.415,4
4,Jayson Tatum,0.304357,0.434,5
5,Markelle Fultz,0.299784,,6
6,Dennis Smith,0.296561,0.313,7
7,Monte Morris,0.2962,,8
8,Jawun Evans,0.294832,0.278,9
9,De'Aaron Fox,0.293069,0.307,10


In [179]:
fg3hold = rook_proj['fg3_pct_nba']['ridge']['hold'].reset_index(drop = True)
fg3hold = fg3hold.merge(ncaa_to_rook[['name','fg3_pct_nba']].loc[(ncaa_to_rook.name.isin(list(fg3hold.name)))&(ncaa_to_rook.fga >=5)],on='name')[['name','pred','fg3_pct_nba']]
fg3hold['rank'] = [x+1 for x in range(len(fg3hold))]
fg3hold.head(10)

Unnamed: 0,name,pred,fg3_pct_nba,rank
0,R.J. Cole,0.369101,,1
1,Cassius Winston,0.367258,,2
2,Markus Howard,0.352274,,3
3,Trae Young,0.346493,,4
4,Jalen Brunson,0.346038,,5
5,Isaiah Reese,0.341146,,6
6,Tony Carr,0.337946,,7
7,Fletcher Magee,0.325687,,8
8,Landry Shamet,0.325394,,9
9,Jhivvan Jackson,0.32314,,10


In [180]:
ast = rook_proj['ast_pct_nba']['ridge']['test'].reset_index(drop = True)
ast = ast.merge(ncaa_to_rook[['name','ast_pct_nba']].loc[(ncaa_to_rook.name.isin(list(ast.name)))&(ncaa_to_rook.mp >=15)],on='name')[['name','pred','ast_pct_nba']]
ast['rank'] = [x+1 for x in range(len(ast))]
ast.head(10)

Unnamed: 0,name,pred,ast_pct_nba,rank
0,Jawun Evans,24.496771,18.09,1
1,Lonzo Ball,23.592387,29.16,2
2,De'Aaron Fox,20.126179,24.61,3
3,Dennis Smith,19.617454,29.27,4
4,Markelle Fultz,18.841297,,5
5,Monte Morris,18.795176,,6
6,Frank Mason,18.499236,22.98,7
7,"Derrick Walton, Jr.",16.352935,14.63,8
8,Edmond Sumner,16.088605,,9
9,Xavier Rathan-Mayes,13.989951,,10


In [181]:
asthold = rook_proj['ast_pct_nba']['ridge']['hold'].reset_index(drop = True)
asthold=asthold.merge(ncaa_to_rook[['name','ast_pct_nba']].loc[(ncaa_to_rook.name.isin(list(asthold.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','ast_pct_nba']]
asthold['rank'] = [x+1 for x in range(len(asthold))]
asthold.head(10)

Unnamed: 0,name,pred,ast_pct_nba,rank
0,Trae Young,29.60673,,1
1,Cassius Winston,25.464991,,2
2,Markell Johnson,23.039647,,3
3,Jevon Carter,22.523451,,4
4,Darrian Ringo,22.409348,,5
5,Jordan McLaughlin,22.125918,,6
6,Tremont Waters,21.887625,,7
7,Emmett Naar,21.535693,,8
8,R.J. Cole,21.421848,,9
9,Devonte' Graham,20.737757,,10


In [182]:
reb = rook_proj['reb_pct_nba']['lasso']['test'].reset_index(drop = True)
reb=reb.merge(ncaa_to_rook[['name','reb_pct_nba']].loc[(ncaa_to_rook.name.isin(list(reb.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','reb_pct_nba']]
reb['rank'] = [x+1 for x in range(len(reb))]
reb.head(10)

Unnamed: 0,name,pred,reb_pct_nba,rank
0,Caleb Swanigan,14.021818,15.5,1
1,John Collins,13.605573,16.92,2
2,Zach Collins,12.363498,11.54,3
3,Ivan Rabb,11.323577,17.77,4
4,T.J. Leaf,11.319585,10.06,5
5,Bam Adebayo,11.232638,15.73,6
6,Josh Jackson,10.872426,9.54,7
7,Johnathan Motley,10.846478,,8
8,Justin Patton,10.598697,,9
9,Jonathan Isaac,10.522648,10.2,10


In [183]:
rebhold = rook_proj['reb_pct_nba']['lasso']['hold'].reset_index(drop = True)
rebhold=rebhold.merge(ncaa_to_rook[['name','reb_pct_nba']].loc[(ncaa_to_rook.name.isin(list(rebhold.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','reb_pct_nba']]
rebhold['rank'] = [x+1 for x in range(len(rebhold))]
rebhold.head(10)

Unnamed: 0,name,pred,reb_pct_nba,rank
0,DeAndre Ayton,15.470325,,1
1,Marvin Bagley III,13.928267,,2
2,"Wendell Carter, Jr.",13.709166,,3
3,Brandon McCoy,12.760283,,4
4,Mohamed Bamba,12.739718,,5
5,Jock Landale,12.624665,,6
6,Devontae Cacok,12.435727,,7
7,Udoka Azubuike,12.346515,,8
8,Jaren Jackson,12.339426,,9
9,Aaron Menzies,11.892816,,10


In [184]:
stl = rook_proj['stl_pct_nba']['ridge']['test'].reset_index(drop = True)
stl = stl.merge(ncaa_to_rook[['name','stl_pct_nba']].loc[(ncaa_to_rook.name.isin(list(stl.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','stl_pct_nba']]
stl['rank'] = [x+1 for x in range(len(stl))]
stl.head(10)

Unnamed: 0,name,pred,stl_pct_nba,rank
0,Lonzo Ball,1.973624,2.34,1
1,Jawun Evans,1.943427,2.26,2
2,Dennis Smith,1.921278,1.73,3
3,De'Anthony Melton,1.890245,,4
4,De'Aaron Fox,1.806568,1.74,5
5,Markelle Fultz,1.803705,,6
6,Josh Jackson,1.801233,1.95,7
7,Donovan Mitchell,1.780931,2.24,8
8,Sindarius Thornwell,1.721755,2.02,9
9,P.J. Dozier,1.630617,,10


In [185]:
stlhold = rook_proj['stl_pct_nba']['ridge']['hold'].reset_index(drop = True)
stlhold=stlhold.merge(ncaa_to_rook[['name','stl_pct_nba']].loc[(ncaa_to_rook.name.isin(list(stlhold.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','stl_pct_nba']]
stlhold['rank'] = [x+1 for x in range(len(stlhold))]
stlhold.head(10)

Unnamed: 0,name,pred,stl_pct_nba,rank
0,Jevon Carter,2.355317,,1
1,Darrian Ringo,2.223028,,2
2,Justin Simon,1.986872,,3
3,Matisse Thybulle,1.960508,,4
4,Dru Smith,1.953188,,5
5,Tremont Waters,1.922204,,6
6,De'Anthony Melton,1.890245,,7
7,John Konchar,1.871318,,8
8,Jacob Gilyard,1.869133,,9
9,T.J. Shorts II,1.866987,,10


In [186]:
blk = rook_proj['blk_pct_nba']['ridge']['test'].reset_index(drop = True)
blk =blk.merge(ncaa_to_rook[['name','blk_pct_nba']].loc[(ncaa_to_rook.name.isin(list(blk.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','blk_pct_nba']]
blk['rank'] = [x+1 for x in range(len(blk))]
blk.head(10)

Unnamed: 0,name,pred,blk_pct_nba,rank
0,Zach Collins,2.995598,2.41,1
1,Jake Wiley,2.661707,,2
2,Jordan Bell,2.652705,5.54,3
3,John Collins,2.461962,3.85,4
4,Chris Boucher,2.34471,,5
5,Austin Wiley,2.339669,,6
6,Bam Adebayo,2.272727,2.57,7
7,Justin Patton,2.173697,,8
8,Jarrett Allen,2.099816,4.56,9
9,Jonathan Isaac,2.080356,4.6,10


In [187]:
blkhold = rook_proj['blk_pct_nba']['ridge']['hold'].reset_index(drop = True)
blkhold = blkhold.merge(ncaa_to_rook[['name','blk_pct_nba']].loc[(ncaa_to_rook.name.isin(list(blkhold.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','blk_pct_nba']]
blkhold['rank'] = [x+1 for x in range(len(blkhold))]
blkhold.head(10)

Unnamed: 0,name,pred,blk_pct_nba,rank
0,Reggie Lynch,3.53259,,1
1,Mohamed Bamba,3.510691,,2
2,Jaren Jackson,3.385513,,3
3,Sagaba Konate,3.152178,,4
4,Kenny Wooten,3.091744,,5
5,Anfernee McLemore,3.040135,,6
6,Daniel Gafford,2.870323,,7
7,Robert Williams,2.846306,,8
8,Mike Watkins,2.81732,,9
9,Pauly Paulicap,2.79843,,10


In [188]:
per = rook_proj['per_nba']['lasso']['test'].reset_index(drop = True)
per = per.merge(ncaa_to_rook[['name','per_nba']].loc[(ncaa_to_rook.name.isin(list(per.name)))&(ncaa_to_rook.mp>15)],on='name')[['name','pred','per_nba']]
per['rank'] = [x+1 for x in range(len(per))]
per.head(10)

Unnamed: 0,name,pred,per_nba,rank
0,John Collins,14.039841,18.22,1
1,Zach Collins,13.499419,7.53,2
2,Lonzo Ball,13.423633,12.34,3
3,Markelle Fultz,12.614809,,4
4,Caleb Swanigan,12.168335,7.45,5
5,Josh Jackson,11.986642,11.9,6
6,T.J. Leaf,11.903419,10.75,7
7,Bam Adebayo,11.89943,15.69,8
8,Lauri Markkanen,11.567489,15.63,9
9,Jordan Bell,11.506894,17.92,10


In [189]:
rook_proj['per_nba']['lasso']['coefs'].sort_values('coef',ascending = False).head(10)

Unnamed: 0,feature,coef
31,ws,1.100496
43,per,0.604343
79,blk_pct_stl_pct,0.601854
61,tov_p36,0.29192
29,ows,0.268389
11,oreb,0.253331
63,pts_ast,0.230804
71,ast_stl,0.116095
41,ortg,0.11594
60,pf_p36,0.103085


In [206]:
perhold =rook_proj['per_nba']['lasso']['hold'].reset_index(drop = True)
perhold= perhold.merge(ncaa_to_rook[['name','per_nba']].loc[(ncaa_to_rook.name.isin(list(perhold.name)))& (ncaa_to_rook.mp>15)],on='name')[['name','pred','per_nba']]
perhold['rank'] = [x+1 for x in range(len(perhold))]
perhold.head(10)

Unnamed: 0,name,pred,per_nba,rank
0,DeAndre Ayton,14.970813,,1
1,Marvin Bagley III,14.200597,,2
2,"Wendell Carter, Jr.",14.095713,,3
3,Jaren Jackson,13.406711,,4
4,Trae Young,13.193693,,5
5,Mohamed Bamba,12.710441,,6
6,Udoka Azubuike,12.352823,,7
7,Jock Landale,12.224504,,8
8,Zhaire Smith,12.026184,,9
9,Daniel Gafford,11.944721,,10


In [212]:
pickfts = rook_proj['pick']['ridge']['coefs'].sort_values('coef',ascending = False).head(10)
pickfts.reset_index(drop = True)

Unnamed: 0,feature,coef
0,grade,5.737596
1,age,4.047837
2,season_count,1.887497
3,ft_pct,1.078558
4,dbl_dbl,0.994134
5,ft_fga,0.976977
6,dreb_pct,0.846444
7,fg3a,0.827361
8,stl_to,0.744902
9,oreb_pct,0.72944


In [221]:
pick = rook_proj['pick']['lasso']['test'].reset_index(drop = True)
pick =pick.merge(ncaa_to_rook[['name','pick']].loc[ncaa_to_rook.name.isin(list(pick.name))],on='name')[['name','pred','pick']]
pick = pick.sort_values('pred')
pick['rank'] = [x+1 for x in range(len(pick))]
pick.head(15)

Unnamed: 0,name,pred,pick,rank
76,Lonzo Ball,-4.769807,2.0,1
75,Josh Jackson,5.159913,4.0,2
74,Lauri Markkanen,5.889417,7.0,3
73,T.J. Leaf,6.036985,18.0,4
72,Justin Patton,6.928527,16.0,5
71,Zach Collins,7.10272,10.0,6
70,Malik Monk,11.639732,11.0,7
69,Jayson Tatum,12.665374,3.0,8
68,Bam Adebayo,13.089689,14.0,9
67,Markelle Fultz,14.69677,1.0,10


In [223]:
pickhold = rook_proj['pick']['lasso']['hold'].reset_index(drop = True)
pickhold =pickhold.merge(ncaa_to_rook[['name']].loc[ncaa_to_rook.name.isin(list(pickhold.name))],on='name')[['name','pred']]
pickhold = pickhold.sort_values('pred')
pickhold['rank'] = [x+1 for x in range(len(pickhold))]
pickhold.head(15)

Unnamed: 0,name,pred,rank
546,DeAndre Ayton,-2.874752,1
545,Marvin Bagley III,-0.250749,2
544,Trae Young,2.414126,3
543,"Wendell Carter, Jr.",6.377511,4
542,Jaren Jackson,7.123939,5
541,Udoka Azubuike,11.252245,6
540,Shai Gilgeous-Alexander,12.394873,7
539,Kevin Knox,14.818291,8
538,Jontay Porter,15.488847,9
537,Cameron Krutwig,17.503287,10


In [204]:
pick = yr3_proj['pick']['lasso']['hold'].reset_index(drop = True)
pick =pick.merge(ncaa_to_yr3[['name','pick']].loc[ncaa_to_yr3.name.isin(list(pick.name))],on='name')[['name','pred']]
pick = pick.sort_values('pred')
pick['rank'] = [x+1 for x in range(len(pick))]
pick.head(15)

Unnamed: 0,name,pred,rank
546,DeAndre Ayton,-6.300398,1
545,Marvin Bagley III,-1.747302,2
544,Trae Young,-1.34411,3
543,"Wendell Carter, Jr.",3.294601,4
542,Jaren Jackson,4.818698,5
541,Udoka Azubuike,8.848248,6
540,Shai Gilgeous-Alexander,10.946366,7
539,Kevin Knox,13.901393,8
538,Jontay Porter,14.619779,9
537,Omari Spellman,14.64535,10


In [193]:
temp = ts[['name','rank']].merge(fg3[['name','rank']], on='name')
temp1 = temp.merge(ast[['name','rank']], on='name')
temp2 = temp1.merge(reb[['name','rank']], on='name')
temp3 = temp2.merge(stl[['name','rank']], on='name')
temp4 = temp3.merge(blk[['name','rank']], on='name')
temp5 = temp4.merge(per[['name','rank']], on='name')
temp5.columns = ['name','rankts','rankfg3','rankAst','rankReb','rankStl','rankBlk','rankPER']
temp5['weightRank'] = [np.mean(x) for x in list(zip(temp5.rankts,temp5.rankfg3,temp5.rankAst,temp5.rankReb,temp5.rankStl,temp5.rankBlk,temp5.rankPER))]
temp5.sort_values('weightRank').head(30)

Unnamed: 0,name,rankts,rankfg3,rankAst,rankReb,rankStl,rankBlk,rankPER,weightRank
1,Lonzo Ball,2,1,2,21,1,34,3,9.142857
4,Josh Jackson,5,16,18,7,7,18,6,11.0
11,Markelle Fultz,12,6,5,25,6,27,4,12.142857
2,T.J. Leaf,3,20,24,5,32,14,7,15.0
7,Jayson Tatum,8,5,23,20,12,22,18,15.428571
0,Zach Collins,1,45,33,3,28,1,2,16.142857
9,De'Aaron Fox,10,10,3,32,5,44,15,17.0
15,Jonathan Isaac,16,27,45,10,18,10,11,19.571429
29,De'Anthony Melton,30,22,11,35,4,24,19,20.714286
14,Jawun Evans,15,9,1,46,2,65,12,21.428571


In [197]:
temph = tshold[['name','rank']].merge(fg3hold[['name','rank']], on='name')
temph1 = temph.merge(asthold[['name','rank']], on='name')
temph2 = temph1.merge(rebhold[['name','rank']], on='name')
temph3 = temph2.merge(stlhold[['name','rank']], on='name')
temph4 = temph3.merge(blkhold[['name','rank']], on='name')
temph5 = temph4.merge(perhold[['name','rank']], on='name')
temph5.columns = ['name','rankts','rankfg3','rankAst','rankReb','rankStl','rankBlk','rankPER']
temph5['weightRank'] = [np.mean(x) for x in list(zip(temph5.rankts,temph5.rankAst,temph5.rankReb,temph5.rankStl,temph5.rankBlk,temph5.rankPER))]
temph5.sort_values('weightRank').head(30).reset_index()

Unnamed: 0,index,name,rankts,rankfg3,rankAst,rankReb,rankStl,rankBlk,rankPER,weightRank
0,2,"Wendell Carter, Jr.",3,206,211,3,90,15,3,54.166667
1,0,Jaren Jackson,1,88,210,9,112,3,4,56.5
2,9,Jontay Porter,10,35,203,24,89,39,17,63.666667
3,10,Ja Morant,11,16,11,135,46,211,11,70.833333
4,16,Shai Gilgeous-Alexander,17,71,13,173,22,208,14,74.5
5,17,D'Marcus Simonds,18,48,48,152,17,188,26,74.833333
6,25,Zhaire Smith,26,226,142,111,69,104,9,76.833333
7,64,Ethan Happ,65,387,184,40,49,96,30,77.333333
8,86,De'Anthony Melton,87,118,70,166,7,123,38,81.833333
9,14,Grant Williams,15,271,163,72,179,65,32,87.666667
