In [1]:
# Work Flow:
# 1. import modules, Create vorp function - uses min-max normalization
# 2. create list of player IDs
# 3. Define previous season
# 4. Make empty final projections list to be populated later
# 5. Read in per game game data
# 6. If needed delete all season data trying to project for
# 7. Filter to more than 10 games played
# 8. Get ID's then project the stats for all the IDs

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Distance function b/n 2 pts
def calc_distance(u, v):
    dist = np.sqrt(np.sum((u-v)**2))   #classic distance formula
    return dist

# create a function to find the player and the next season
def find_player(df, player_id, season):
    # replaces for loop
    for row in df.itertuples():
        if season == row.season_id and player_id == row.player_id:
            return row

In [3]:
#doing the above process on all columns, col is input
def normalize(col):
    return (col - col.min()) / (col.max() - col.min())

cols_to_norm = ['pts',
    'min',
    'fgm',
    'fga',
    'fg3m',
    'fg3a',
    'ftm',
    'fta',
    'oreb',
    'dreb',
    'ast',
    'stl',
    'tov',
    'blk']

def vorp(df):
    for col_name in cols_to_norm:
        df['{}_norm'.format(col_name)] = normalize(df[col_name])
    return df

In [4]:
current_player_season = '2017-18'
final_projections = []

In [11]:
def player_comparison_tool(df, current_player_season, current_player_id):
    if (((df['season_id'] == current_player_season) & (df['player_id'] == current_player_id)).any() == False):
        print('Can\'t find player with id {} and season {}'.format(current_player_id, current_player_season))
        return
    for row in df.itertuples():
        if current_player_season == row.season_id and current_player_id == row.player_id:
            current_player_id = row.player_id
            break
      
    current_player_vector = np.array([
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'pts_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'min_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fgm_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fga_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fg3m_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fg3a_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'ftm_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fta_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'oreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'dreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'ast_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'stl_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'tov_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'blk_norm']).item()
    ])

    print('Projecting player_id {0} for season {1}'.format(current_player_id, season_list[(season_list.index(row.season_id) + 1)]))
    
    player_distance = []
    
    weighted_numbers = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    
    for row in df.itertuples():
        compared_player_vector = np.array([
        row.pts_norm,
        row.min_norm,
        row.fgm_norm,
        row.fga_norm,
        row.fg3m_norm,
        row.fg3a_norm,
        row.ftm_norm,
        row.fta_norm,
        row.oreb_norm,
        row.dreb_norm,
        row.ast_norm,
        row.stl_norm,
        row.tov_norm,
        row.blk_norm
        ])
        
        vfunc = np.vectorize(calc_distance)
        distance_vect = vfunc(current_player_vector, compared_player_vector)
        weighted_distance = distance_vect * weighted_numbers
        #number = np.sum(np.abs(distance_vect)) / len(distance_vect)
        number = np.sum(np.abs(weighted_distance)) / len(distance_vect)
        player_distance.append(number)
    
    df['distance'] = player_distance
    
    ranked_df = df.sort_values('distance')
    
    stats = ['pts',
             'min',
             'fgm',
             'fga',
             'fg3m',
             'fg3a',
             'ftm',
             'fta',
             'oreb',
             'dreb',
             'ast',
             'stl',
             'tov',
             'blk'
             ]
    
    projected_stats = {}
    
    for col in stats:
        sum_stat = 0
        sum_weight = 0
        for index, row in ranked_df.iloc[1:11].iterrows():
            # skip over the row if it was 2017-18 season because we can't take the next
            if row.season_id == '2017-18':
                continue
            # get the players next season
            weight = (1 / row.distance)
            next_season = season_list[(season_list.index(row.season_id) + 1)]
            # find the player row with the id and the next season
            player_next_season = find_player(ranked_df, row.player_id, next_season)
            # if player_next_season doesn't exist then skip
            if player_next_season == None:
                continue
            sum_stat += getattr(player_next_season, col) * weight
            sum_weight += weight
        projected_stats['player_id'] = current_player_id
        projected_stats['proj_season_id'] = season_list[(season_list.index(current_player_season) + 1)]
        projected_stats['proj_' + col] = (sum_stat / sum_weight)
       
        
    return projected_stats

In [12]:
df_game_data = pd.read_csv('D:/Python Portfolio Projects/NBA Projection/nba-stats-model-master/nba-stats-csv/player_general_traditional_per_game_data.csv', header = 0)

df_new = df_game_data[df_game_data.season_id != '2018-19'] # select all data thats not 2018-19

df_cleaned = df_new.dropna(how='any')      # drop any rows that have a blank

min_gp = 10
df_filter = df_cleaned[df_cleaned['gp'] > min_gp]

if '2018-19' in df_filter:
    print('exist')
else:
    print('deleted')

deleted


In [13]:
df_final = df_filter.groupby(['season_id']).apply(vorp)
df_final

Unnamed: 0,player_id,season_id,gp,age,min,fgm,fga,fg_pct,fg3m,fg3a,...,fg3m_norm,fg3a_norm,ftm_norm,fta_norm,oreb_norm,dreb_norm,ast_norm,stl_norm,tov_norm,blk_norm
1,920,1996-97,83.0,33.0,30.8,2.8,5.8,0.483,0.0,0.2,...,0.000000,0.025974,0.234375,0.255319,0.457627,0.500000,0.070175,0.296296,0.204545,0.058824
2,243,1996-97,83.0,24.0,20.4,1.8,4.4,0.411,0.5,1.2,...,0.178571,0.155844,0.171875,0.138298,0.084746,0.200000,0.166667,0.333333,0.250000,0.088235
3,1425,1996-97,33.0,25.0,17.8,2.6,4.5,0.574,0.0,0.0,...,0.000000,0.000000,0.156250,0.159574,0.322034,0.230000,0.043860,0.185185,0.227273,0.264706
4,768,1996-97,47.0,27.0,11.1,1.4,3.8,0.374,0.0,0.1,...,0.000000,0.012987,0.171875,0.191489,0.118644,0.110000,0.035088,0.111111,0.159091,0.176471
5,228,1996-97,62.0,27.0,15.4,1.3,2.6,0.513,0.0,0.0,...,0.000000,0.000000,0.171875,0.180851,0.203390,0.210000,0.043860,0.185185,0.159091,0.058824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10098,1628380,2017-18,66.0,20.0,15.8,1.7,4.4,0.398,0.5,1.7,...,0.119048,0.170000,0.045977,0.059406,0.137255,0.233645,0.077670,0.125000,0.180000,0.192308
10099,203897,2017-18,24.0,23.0,27.3,5.7,14.8,0.383,1.8,5.1,...,0.428571,0.510000,0.413793,0.445545,0.078431,0.308411,0.291262,0.416667,0.360000,0.076923
10100,2216,2017-18,59.0,36.0,25.6,6.1,12.9,0.473,0.9,2.5,...,0.214286,0.250000,0.160920,0.178218,0.313725,0.457944,0.213592,0.291667,0.400000,0.076923
10101,2585,2017-18,69.0,34.0,14.1,2.2,3.8,0.564,0.0,0.0,...,0.000000,0.000000,0.126437,0.128713,0.254902,0.299065,0.155340,0.250000,0.200000,0.076923


In [14]:
season_list = [
    '1996-97',
    '1997-98',
    '1998-99',
    '1999-00',
    '2000-01',
    '2001-02',
    '2002-03',
    '2003-04',
    '2004-05',
    '2005-06',
    '2006-07',
    '2007-08',
    '2008-09',
    '2009-10',
    '2010-11',
    '2011-12',
    '2012-13',
    '2013-14',
    '2014-15',
    '2015-16',
    '2016-17',
    '2017-18',
    '2018-19']

In [15]:
player_df = pd.read_csv('D:/Python Portfolio Projects/NBA Projection/nba-stats-model-master/nba-stats-csv/player_info.csv')
player_filter = player_df[player_df['season_id'] == '2017-18']
player_ids_2018 = player_filter['player_id'].tolist()
player_ids_2018

[201166,
 203932,
 1626151,
 1628935,
 1627846,
 203940,
 201143,
 2744,
 202329,
 1626210,
 202692,
 1628409,
 203518,
 1627936,
 203458,
 1627816,
 1628035,
 203459,
 101161,
 203083,
 2738,
 201281,
 203460,
 101106,
 1626150,
 1628510,
 203952,
 1627790,
 1626148,
 203076,
 201229,
 1628469,
 1628499,
 203382,
 201167,
 203085,
 202340,
 1628389,
 203463,
 1628500,
 1627732,
 202687,
 201933,
 1626246,
 201628,
 1626171,
 203992,
 202711,
 203078,
 201148,
 1627742,
 201943,
 203464,
 1627362,
 1627744,
 201572,
 203998,
 1627854,
 1627741,
 203710,
 203468,
 101139,
 1628403,
 1626166,
 1627747,
 2546,
 1626224,
 202718,
 101112,
 1628429,
 1627767,
 1627778,
 1628449,
 1626191,
 101108,
 203991,
 203469,
 202332,
 201147,
 202709,
 201584,
 1626245,
 1626156,
 201571,
 1628391,
 1626177,
 1627745,
 203081,
 2863,
 1627814,
 1628422,
 1627772,
 1628464,
 201568,
 201980,
 201967,
 203957,
 1627863,
 203967,
 203121,
 201589,
 201954,
 1626199,
 1628021,
 204065,
 2561,
 202722,
 1

In [16]:
for baller_id in player_ids_2018:
    current_player_id = baller_id
    current_player_season = '2017-18'
    projections = player_comparison_tool(df_final, current_player_season, current_player_id)
    if (projections == None):
        continue
    final_projections.append(projections)

Projecting player_id 201166 for season 2018-19
Projecting player_id 203932 for season 2018-19
Can't find player with id 1626151 and season 2017-18
Can't find player with id 1628935 and season 2017-18
Projecting player_id 1627846 for season 2018-19
Can't find player with id 203940 and season 2017-18
Projecting player_id 201143 for season 2018-19
Projecting player_id 2744 for season 2018-19
Projecting player_id 202329 for season 2018-19
Can't find player with id 1626210 and season 2017-18
Projecting player_id 202692 for season 2018-19
Projecting player_id 1628409 for season 2018-19
Projecting player_id 203518 for season 2018-19
Projecting player_id 1627936 for season 2018-19
Projecting player_id 203458 for season 2018-19
Projecting player_id 1627816 for season 2018-19
Projecting player_id 1628035 for season 2018-19
Projecting player_id 203459 for season 2018-19
Projecting player_id 101161 for season 2018-19
Projecting player_id 203083 for season 2018-19
Projecting player_id 2738 for seas

Projecting player_id 101123 for season 2018-19
Can't find player with id 1628492 and season 2017-18
Projecting player_id 203507 for season 2018-19
Projecting player_id 203922 for season 2018-19
Projecting player_id 201609 for season 2018-19
Can't find player with id 202330 and season 2017-18
Projecting player_id 203476 for season 2018-19
Projecting player_id 202328 for season 2018-19
Projecting player_id 1627824 for season 2018-19
Projecting player_id 203084 for season 2018-19
Projecting player_id 202355 for season 2018-19
Projecting player_id 1627740 for season 2018-19
Projecting player_id 203546 for season 2018-19
Projecting player_id 101133 for season 2018-19
Projecting player_id 1628387 for season 2018-19
Projecting player_id 202697 for season 2018-19
Projecting player_id 203477 for season 2018-19
Projecting player_id 1628439 for season 2018-19
Projecting player_id 1627819 for season 2018-19
Projecting player_id 202738 for season 2018-19
Projecting player_id 1627785 for season 2018

Can't find player with id 1628493 and season 2017-18
Projecting player_id 1938 for season 2018-19
Can't find player with id 202705 and season 2017-18
Projecting player_id 201188 for season 2018-19
Projecting player_id 101162 for season 2018-19
Projecting player_id 201158 for season 2018-19
Projecting player_id 1627875 for season 2018-19
Projecting player_id 202694 for season 2018-19
Can't find player with id 1627779 and season 2017-18
Projecting player_id 203935 for season 2018-19
Projecting player_id 201596 for season 2018-19
Projecting player_id 1626209 for season 2018-19
Can't find player with id 203900 and season 2017-18
Projecting player_id 1628365 for season 2018-19
Projecting player_id 202693 for season 2018-19
Projecting player_id 1627737 for season 2018-19
Can't find player with id 203104 and season 2017-18
Projecting player_id 201578 for season 2018-19
Can't find player with id 1627850 and season 2017-18
Projecting player_id 101107 for season 2018-19
Projecting player_id 2034

Projecting player_id 2617 for season 2018-19


ZeroDivisionError: division by zero

In [17]:
final_projections

[{'player_id': 201166,
  'proj_season_id': '2018-19',
  'proj_pts': 3.247781814627802,
  'proj_min': 9.377417634084175,
  'proj_fgm': 1.1893871423726294,
  'proj_fga': 2.9801030796796444,
  'proj_fg3m': 0.33781538803139777,
  'proj_fg3a': 0.9657327762596014,
  'proj_ftm': 0.5550304794054391,
  'proj_fta': 0.7658628903378878,
  'proj_oreb': 0.17247938998219225,
  'proj_dreb': 0.7378874890795912,
  'proj_ast': 0.9881498387193348,
  'proj_stl': 0.3580651375906496,
  'proj_tov': 0.5561099116632604,
  'proj_blk': 0.052392748418150434},
 {'player_id': 203932,
  'proj_season_id': '2018-19',
  'proj_pts': 16.063872909578286,
  'proj_min': 33.942108833297176,
  'proj_fgm': 5.956650192908984,
  'proj_fga': 13.509174712616089,
  'proj_fg3m': 1.402501968022784,
  'proj_fg3a': 3.96183379519587,
  'proj_ftm': 2.800268326884198,
  'proj_fta': 3.60847455598598,
  'proj_oreb': 1.621652768244361,
  'proj_dreb': 5.126943373139583,
  'proj_ast': 1.9944303900893032,
  'proj_stl': 0.9095790943162939,
  'pro

In [18]:
Fproj = pd.DataFrame(final_projections)
Fproj

Unnamed: 0,player_id,proj_season_id,proj_pts,proj_min,proj_fgm,proj_fga,proj_fg3m,proj_fg3a,proj_ftm,proj_fta,proj_oreb,proj_dreb,proj_ast,proj_stl,proj_tov,proj_blk
0,201166,2018-19,3.247782,9.377418,1.189387,2.980103,0.337815,0.965733,0.555030,0.765863,0.172479,0.737887,0.988150,0.358065,0.556110,0.052393
1,203932,2018-19,16.063873,33.942109,5.956650,13.509175,1.402502,3.961834,2.800268,3.608475,1.621653,5.126943,1.994430,0.909579,1.579757,0.710601
2,1627846,2018-19,4.228748,13.063556,1.557578,3.787026,0.425348,1.268545,0.687648,0.968616,0.441212,1.263137,0.540173,0.365073,0.625435,0.098735
3,201143,2018-19,12.340232,30.246673,4.860939,10.359166,1.127251,3.079483,1.470858,1.927194,1.218308,4.582944,3.247733,0.652499,1.609267,1.025916
4,2744,2018-19,4.788678,12.869186,2.056174,4.236512,0.080406,0.200105,0.635905,0.861079,0.796009,2.264129,0.832093,0.332513,0.739052,0.552025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,203092,2018-19,6.992136,17.728363,2.958830,6.163892,0.083728,0.217175,0.977756,1.260572,1.455646,2.894487,0.732690,0.385761,0.784640,0.516997
440,201936,2018-19,15.368434,33.090735,5.570422,13.122411,1.529883,4.206938,2.698458,3.321432,0.600491,2.985027,4.212666,1.169520,1.821345,0.335771
441,1627820,2018-19,8.743861,24.709674,3.408263,7.954844,0.494450,1.474450,1.459188,1.972056,0.631630,2.438760,2.286056,0.850559,1.395369,0.362485
442,2199,2018-19,6.764664,23.322402,2.701817,5.194800,0.000000,0.018130,1.364581,2.020834,2.358611,4.833303,0.718499,0.559929,0.992192,0.565582


In [19]:
df_names = pd.read_csv('D:/Python Portfolio Projects/NBA Projection/nba-stats-model-master/nba-stats-csv/player_id_player_name.csv')

In [20]:
final_stat_df = pd.merge(Fproj, df_names, left_on=['player_id'], right_on=['player_id'], how='inner')

round(final_stat_df,1)

final_stat_df

Unnamed: 0,player_id,proj_season_id,proj_pts,proj_min,proj_fgm,proj_fga,proj_fg3m,proj_fg3a,proj_ftm,proj_fta,proj_oreb,proj_dreb,proj_ast,proj_stl,proj_tov,proj_blk,player_name
0,201166,2018-19,3.247782,9.377418,1.189387,2.980103,0.337815,0.965733,0.555030,0.765863,0.172479,0.737887,0.988150,0.358065,0.556110,0.052393,Aaron Brooks
1,203932,2018-19,16.063873,33.942109,5.956650,13.509175,1.402502,3.961834,2.800268,3.608475,1.621653,5.126943,1.994430,0.909579,1.579757,0.710601,Aaron Gordon
2,201143,2018-19,12.340232,30.246673,4.860939,10.359166,1.127251,3.079483,1.470858,1.927194,1.218308,4.582944,3.247733,0.652499,1.609267,1.025916,Al Horford
3,2744,2018-19,4.788678,12.869186,2.056174,4.236512,0.080406,0.200105,0.635905,0.861079,0.796009,2.264129,0.832093,0.332513,0.739052,0.552025,Al Jefferson
4,202329,2018-19,9.784116,27.978945,3.574506,8.190519,1.407756,3.877699,1.238291,1.679742,1.101761,4.286956,1.754433,0.946406,1.138945,0.446132,Al-Farouq Aminu
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
342,1627755,2018-19,6.400291,20.434310,2.515711,5.655281,0.377043,1.038290,1.011089,1.272810,0.325468,1.538522,3.783353,0.737197,1.216487,0.125687,Tyler Ulis
343,203092,2018-19,6.992136,17.728363,2.958830,6.163892,0.083728,0.217175,0.977756,1.260572,1.455646,2.894487,0.732690,0.385761,0.784640,0.516997,Tyler Zeller
344,201936,2018-19,15.368434,33.090735,5.570422,13.122411,1.529883,4.206938,2.698458,3.321432,0.600491,2.985027,4.212666,1.169520,1.821345,0.335771,Tyreke Evans
345,2199,2018-19,6.764664,23.322402,2.701817,5.194800,0.000000,0.018130,1.364581,2.020834,2.358611,4.833303,0.718499,0.559929,0.992192,0.565582,Tyson Chandler


In [15]:
player_info_columns = [
    'player_name',
    'player_id',
    'proj_season_id']

In [16]:
player_info = final_stat_df[player_info_columns]

In [17]:
final_stat_df.to_csv('D:/Python Portfolio Projects/NBA Projection/player_proj_df_1819.csv', index=False)
player_info.to_csv('D:/Python Portfolio Projects/NBA Projection/player_info_df_1819.csv', index=False)

In [22]:
stats_1819 = df_game_data[df_game_data['season_id'] == '2018-19']

In [23]:
Total_stats_df = pd.merge(final_stat_df, stats_1819, left_on=['player_id'], right_on=['player_id'], how='inner')

In [24]:
Total_stats_df

Unnamed: 0,player_id,proj_season_id,proj_pts,proj_min,proj_fgm,proj_fga,proj_fg3m,proj_fg3a,proj_ftm,proj_fta,...,ftm,fta,ft_pct,oreb,dreb,ast,tov,stl,blk,pts
0,203932,2018-19,16.063873,33.942109,5.956650,13.509175,1.402502,3.961834,2.800268,3.608475,...,2.4,3.2,0.731,1.7,5.7,3.7,2.1,0.7,0.7,16.0
1,201143,2018-19,12.340232,30.246673,4.860939,10.359166,1.127251,3.079483,1.470858,1.927194,...,1.1,1.4,0.821,1.8,5.0,4.2,1.5,0.9,1.3,13.6
2,202329,2018-19,9.784116,27.978945,3.574506,8.190519,1.407756,3.877699,1.238291,1.679742,...,1.9,2.1,0.867,1.4,6.1,1.3,0.9,0.8,0.4,9.4
3,202692,2018-19,8.349126,20.803754,3.014837,7.286938,0.692337,2.190524,1.667793,2.166992,...,1.8,2.2,0.823,0.5,3.2,2.0,1.0,0.6,0.3,8.8
4,203518,2018-19,5.502404,16.998044,1.876591,4.592593,1.065089,2.800757,0.668980,0.839953,...,0.4,0.4,0.923,0.2,1.4,0.6,0.5,0.5,0.2,5.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
291,1627755,2018-19,6.400291,20.434310,2.515711,5.655281,0.377043,1.038290,1.011089,1.272810,...,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
292,203092,2018-19,6.992136,17.728363,2.958830,6.163892,0.083728,0.217175,0.977756,1.260572,...,2.3,3.0,0.778,1.8,2.2,0.7,0.7,0.2,0.5,7.7
293,201936,2018-19,15.368434,33.090735,5.570422,13.122411,1.529883,4.206938,2.698458,3.321432,...,1.7,2.3,0.719,0.5,2.4,2.4,1.7,0.8,0.3,10.2
294,2199,2018-19,6.764664,23.322402,2.701817,5.194800,0.000000,0.018130,1.364581,2.020834,...,0.9,1.6,0.586,1.7,3.9,0.7,0.8,0.4,0.4,3.1


In [25]:
Total_stats_df.to_csv('D:/Python Portfolio Projects/NBA Projection/total_stats_df.csv', index=False)