# Comparing to competitors

In [None]:
player_proj = pd.read_csv('nba-csv/player_proj_df.csv')

### Convert current columns to similar competitor columns

In [None]:
columns = ['player_name',
           'player_id',
           'proj_pts',
           'proj_min',
           'proj_fgm',
           'proj_fga',
           'proj_fg3m',
           'proj_fg3a',
           'proj_ftm',
           'proj_fta',
           'proj_oreb',
           'proj_dreb',
           'proj_ast',
           'proj_stl',
           'proj_tov',
           'proj_blk']

In [None]:
df_proj_final = player_proj.loc[:, columns]

In [None]:
df_proj_final['proj_fg%'] = df_proj_final['proj_fgm']  / df_proj_final['proj_fga']
df_proj_final['proj_ft%'] = df_proj_final['proj_ftm'] + df_proj_final['proj_fta']
df_proj_final['proj_reb'] = df_proj_final['proj_oreb'] + df_proj_final['proj_dreb']

In [None]:
final_columns = ['player_name',
           'player_id',
           'proj_pts',
           'proj_reb',
           'proj_ast',
           'proj_blk',
           'proj_stl',
           'proj_fg%',
           'proj_ft%',
           'proj_fg3m',
           'proj_min',
           'proj_tov']

In [None]:
df_proj_final = df_proj_final[final_columns]

### Read in competitor data

In [None]:
# read in projections to dataframe
df_comp_1 = pd.read_csv('nba-csv/ESPN_CBS_FantasyPros_Fantasy_Basketball_Overall_2018_Average_Projections.csv')
#df_comp_2 = pd.read_csv('Hashtag_CBS_FantasyPros_Fantasy_Basketball_Overall_2018_Average_Projections.csv')

In [None]:
columns_to_drop = ['Team', 'Positions', 'GP']

In [None]:
df_comp_1.drop(columns = columns_to_drop, inplace = True)

In [None]:
df_comp_1.sample(5)

In [None]:
# find names and match to player_id
lowercase_names = df_comp_1['Player'].str.lower()
df_comp_1['Player'] = lowercase_names

In [None]:
lowercase_names.head(5)

In [None]:
# merge with player_name
player_df = pd.read_csv('nba-csv/player_name_player_id_all_seasons_final.csv')
season = player_df['season_id'] == '2016-17'
player_df = player_df[season]

In [None]:
lowercase = player_df['player_name'].str.lower()

In [None]:
player_df['player_name'] = lowercase

In [None]:
comp_merged = pd.merge(df_comp_1, player_df[['player_name','player_id']], how = 'left', left_on = 'Player', right_on = 'player_name').drop_duplicates().reset_index(drop=True)

In [None]:
comp_merged.dropna(how = 'any', inplace = True)

In [None]:
comp_merged.dtypes

In [None]:
player_ids = comp_merged['player_id'].astype(int)

In [None]:
comp_merged['player_id'] = player_ids

In [None]:
del comp_merged['Player']

In [None]:
cols = [
    'player_name',
    'player_id',
    'PTS',
    'REB',
    'AST',
    'BLK',
    'STL',
    'FG%',
    'FT%',
    '3PM',
    'MIN',
    'TO']

In [None]:
comp_merged = comp_merged[cols]

In [None]:
comp_merged.sample(5)

In [None]:
df_real = player_proj.loc[:, ['player_name', 'player_id', 'pts','min','fgm','fga','fg3m','fg3a','ftm','fta','oreb','dreb','ast','stl','tov', 'blk']]

In [None]:
df_real['fg%'] = df_real['fgm']  / df_real['fga']
df_real['ft%'] = df_real['ftm'] + df_real['fta']
df_real['reb'] = df_real['oreb'] + df_real['dreb']

In [None]:
final_real_columns = ['player_name',
           'player_id',
           'pts',
           'reb',
           'ast',
           'blk',
           'stl',
           'fg%',
           'ft%',
           'fg3m',
           'min',
           'tov']

In [None]:
df_real_final = df_real[final_real_columns]

In [None]:
df_real_final.head()

In [None]:
competitor_final = pd.merge(comp_merged, df_real_final, how = 'left', on = 'player_id')

In [None]:
competitor_final.sample(5)

In [None]:
# temp drop until I run player_comparison_tool for all players
competitor_proj = competitor_final.dropna(how = 'any')

In [None]:
df_real = competitor_proj.loc[:, ['pts','reb','ast','blk','stl','fg%','ft%','fg3m','min','tov']]

In [None]:
df_proj = competitor_proj.loc[:, ['PTS','REB','AST','BLK','STL','FG%','FT%','3PM','MIN','TO']]

In [None]:
# calculate mean square error
lin_mse = mean_squared_error(df_real, df_proj, multioutput='raw_values')
lin_rmse = np.sqrt(lin_mse)
confidence = np.mean(lin_rmse)
print('{0} percent confidence in projected {1} per game stats'.format(100 - round(confidence, 2), '2016-17'))

### Match up against our model for same stat columns

In [None]:
df_proj_final.sample(5)

In [None]:
model_final = pd.merge(df_proj_final, df_real_final, how = 'left', on = 'player_id')

In [None]:
model_final.head(5)

In [None]:
df_real = model_final.loc[:, ['pts','reb','ast','blk','stl','fg%','ft%','fg3m','min','tov']]

In [None]:
df_proj = model_final.loc[:, ['proj_pts','proj_reb','proj_ast','proj_blk','proj_stl','proj_fg%','proj_ft%','proj_fg3m','proj_min','proj_tov']]

In [None]:
# calculate mean square error
lin_mse = mean_squared_error(df_real, df_proj, multioutput='raw_values')
lin_rmse = np.sqrt(lin_mse)
confidence = np.mean(lin_rmse)
print('{0} percent confidence in projected {1} per game stats'.format(100 - round(confidence, 2), '2016-17'))