In [1]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error



In [2]:
#Gets the data from the scraper and deletes the index column
main_df = pd.read_csv('merged.csv')
del main_df['Unnamed: 0']
del main_df['BPM']
main_df = main_df.fillna(0)
raptor = pd.read_csv('modern_RAPTOR_by_player.csv')

In [3]:
#Cleans Raptor Data
raptor_clean = raptor[['player_name','raptor_total', 'season', 'mp']]

columns_to_rename = {'player_name': 'Player','raptor_total': 'Raptor'}
raptor_clean = raptor_clean.rename(columns = columns_to_rename)
raptor_clean['Raptor'] = round(raptor_clean['Raptor'], 1)

In [4]:
#Missing data

#2000-2012 (Shuttlerun)
#2014, 16, 21, 22, 23 (Bench Press)

#Get total mp
#Remove duplicate names

shuttleRun = range(2000,2013)
benchPress = [2014,2016,2021,2022,2023]


In [5]:
#Gets Predidctor variables
def get_predictors(year):
        columns_to_drop = ['Total Minutes', 
                         'Seasons Played',
                         'Player',
                         'POSITION',
                         'Year',
                         'STANDING_REACH_FT_IN',
                         'DRAFT_NUMBER.1',
                         'Average Minutes',
                         'DRAFT_NUMBER']
        

        predictors = (main_df.drop(columns = columns_to_drop)).columns
        return predictors
        '''
        if year in shuttleRun:
            predictors.drop(columns = year)
        elif year in benchPress:
        '''    

        
    


In [6]:
main_df.columns

Index(['Year', 'Player', 'POSITION', 'DRAFT_NUMBER', 'HEIGHT_WO_SHOES',
       'WEIGHT', 'WINGSPAN', 'STANDING_REACH', 'STANDING_REACH_FT_IN',
       'STANDING_VERTICAL_LEAP', 'MAX_VERTICAL_LEAP', 'LANE_AGILITY_TIME',
       'THREE_QUARTER_SPRINT', 'BENCH_PRESS', 'Age (Days)', 'DRAFT_NUMBER.1',
       'G', 'MP', 'FG%', '2P%', '3P%', 'FT%', 'PTS', 'TRB', 'AST', 'STL',
       'BLK', 'WS', 'Total Minutes', 'Seasons Played', 'Average Minutes'],
      dtype='object')

In [7]:
import numpy as np
import scipy
import scipy.stats as stats

In [8]:
def choose_year(year, column_to_predict):
    
    #Training the model 
    train = main_df[main_df["Year"] < year]
    test = main_df[main_df['Year'] == year]
    reg = Ridge(alpha = 0.01)
    reg.fit(train[predictors], train[column_to_predict])

    #Getting predicitons in a dataframe
    predictions = reg.predict(test[predictors])
    predictions = pd.DataFrame(predictions, columns = ['Predictions'], index = test.index)
    combination = pd.concat([test[['Player', column_to_predict]], predictions], axis = 1)
    
    #Getting Mean Square Error
    mse = mean_squared_error(combination[column_to_predict], combination['Predictions'])
    string = 'The mean square error for this draft is: ' + str(mse)
    
    #Displays rank which refers to each players draft position relative to one another in the given dataset as well as the predicted rank and difference
    #Not used cause clutter, can maybe delete?
    combination = combination.sort_values(column_to_predict, ascending = True)
    combination['RK'] = list(range(1, combination.shape[0]+1))
    combination = combination.sort_values('Predictions', ascending = True)
    combination['Predicted_RK'] = list(range(1, combination.shape[0]+1))
    combination['Difference'] = combination['Predicted_RK'] - combination['RK']

    
    #Gets a clutter free df 'clean' which neatly displays information
    important_columns = ['Player',  column_to_predict,  'Predictions']
    clean = combination.loc[:,important_columns]
    clean['Predictions'] = round(clean['Predictions'], 0)
    clean[column_to_predict] = round(clean[column_to_predict], 0)
    clean['Predictions'] = clean['Predictions'].astype(int)
    clean[column_to_predict] = clean[column_to_predict].astype(int)
    clean['Difference'] =clean[column_to_predict] - clean['Predictions'] 
    
    #Gets Average Minutes but not used for Average Minutes Prediciton
    '''
    clean = clean.merge(main_df[['Player', 'Average Minutes']], how='left', on='Player')
    clean['Average Minutes'] = clean['Average Minutes'].astype(int)
    '''
    #Gets Z Score of Difference
    clean['Adjusted Difference'] = scipy.stats.zscore(clean['Difference'], axis=0, ddof=0, nan_policy='propagate')
    clean['Score'] = clean['Adjusted Difference']*clean['Average Minutes'] 
    score = (clean['Adjusted Difference']*clean['Average Minutes'] ).sum()
    return clean, string

    

In [9]:
def Predict_Average_Minutes_Control(year):
    
    column_to_predict = 'Average Minutes'
    predictors = get_predictors(year)
    full_predictions = choose_year(year, column_to_predict)
    predictions_df = full_predictions[0]
    
    
    control_predictor = ['DRAFT_NUMBER']
   
    
    #Training the model 
    train = main_df[main_df["Year"] < year]
    test = main_df[main_df['Year'] == year]
    reg = Ridge(alpha = 0.01)
    reg.fit(train[control_predictor], train[column_to_predict])

    #Getting predicitons in a dataframe
    predictions = reg.predict(test[control_predictor])
    predictions = pd.DataFrame(predictions, columns = ['Control_Predictions'], index = test.index)
    combination = pd.concat([test[['Player', column_to_predict]], predictions], axis = 1)
    
    
    #Cleans Predictions Dataframe
    important_columns = ['Player',  column_to_predict,  'Control_Predictions']
    clean = combination.loc[:,important_columns]
    clean['Control_Predictions'] = round(clean['Control_Predictions'], 0)
    clean[column_to_predict] = round(clean[column_to_predict], 0)
    clean['Control_Predictions'] = clean['Control_Predictions'].astype(int)
    clean[column_to_predict] = clean[column_to_predict].astype(int)
    clean['Control_Difference'] = clean[column_to_predict] - clean['Control_Predictions'] 
    
    #Merge Control and Main Dataframes
    
    merged = clean.merge(predictions_df[['Player', 'Predictions', 'Difference']], how='left', on='Player')
    merged = merged.sort_values('Predictions', ascending = False)
    return merged

In [10]:
'''
test = 0
for year in range(2014,2021):
    print(year)
    print(choose_year(int(year))[1])
    test = test + choose_year(int(year))[1]
'''

'\ntest = 0\nfor year in range(2014,2021):\n    print(year)\n    print(choose_year(int(year))[1])\n    test = test + choose_year(int(year))[1]\n'

In [11]:
from nba_api.stats.static import players
from nba_api.stats.endpoints import draftcombinestats
from nba_api.stats.endpoints import commonplayerinfo
def get_info(name):
    player_dict = players.get_players()
    for player in player_dict:
        if player['full_name'] == name:
            player_info = commonplayerinfo.CommonPlayerInfo(player_id=player['id'])
            df = player_info.get_data_frames()
            df[1]['SCHOOL'] = df[0]['SCHOOL']
    
    
    return(df[1])
    
    

In [12]:
a = get_info('Steven Adams')
a

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TimeFrame,PTS,AST,REB,PIE,SCHOOL
0,203500,Steven Adams,2021-22,6.9,3.4,10.0,0.108,Pittsburgh


In [13]:
year = input("Enter the draft class you would like to predict: ")


Enter the draft class you would like to predict: 2012


In [14]:
year = int(year)
predictors = get_predictors(year)

In [15]:
a = Predict_Average_Minutes_Control(year)
a

Unnamed: 0,Player,Average Minutes,Control_Predictions,Control_Difference,Predictions,Difference
32,Jared Sullinger,1307,1098,209,1791,-484
15,Michael Kidd-Gilchrist,1374,1538,-164,1764,-390
1,Harrison Barnes,2377,1422,955,1633,744
29,Thomas Robinson,841,1468,-627,1579,-738
14,Terrence Jones,928,1167,-239,1538,-610
4,Jae Crowder,1859,797,1062,1518,341
3,Bradley Beal,2242,1514,728,1503,739
7,Draymond Green,1950,774,1176,1483,467
35,Tony Wroten,768,1005,-237,1347,-579
36,Tyler Zeller,906,1190,-284,1325,-419
