In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network._multilayer_perceptron import MLPRegressor
from google.oauth2 import service_account
from datetime import datetime as date
from scipy.stats import pearsonr
from scipy.stats import spearmanr

import pandas as pd
import pandas_gbq


In [None]:
pearsonr

In [49]:
player_3gm_avg = ["min_3gm_avg", "fgm_3gm_avg", "fga_3gm_avg", "fg%_3gm_avg", "3pm_3gm_avg", 
                   "3pa_3gm_avg", "3p%_3gm_avg", "ftm_3gm_avg", "fta_3gm_avg", "ft%_3gm_avg", 
                   "oreb_3gm_avg", "dreb_3gm_avg", "reb_3gm_avg", "ast_3gm_avg", "stl_3gm_avg", 
                   "blk_3gm_avg", "to_3gm_avg", "pf_3gm_avg", "pts_3gm_avg", "plus_mins_3gm_avg"]

teams_3gm_avg = ["offrtg_3gm_avg", "defrtg_3gm_avg", "netrtg_3gm_avg", "ast%_3gm_avg", "ast_to_3gm_avg", 
                    "ast_ratio_3gm_avg", "oreb%_3gm_avg", "dreb%_3gm_avg", "reb%_3gm_avg", "tov%_3gm_avg", 
                    "efg%_3gm_avg", "ts%_3gm_avg", "pace_3gm_avg", "pie_3gm_avg"]

#using shifted windows for rolling data to prevent data leakage
player_query = f""" 
SELECT player,team,game_id,game_date,matchup,pts,reb,ast,blk,stl,`3pm`, {','.join([f'`{player}`' for player in player_3gm_avg])},season
from `capstone_data.player_modeling_data`
order by game_date asc
"""

team_query = f"""
SELECT team,game_id,game_date,home,away, {', '.join([f'`{team}`' for team in teams_3gm_avg])}
from `capstone_data.team_modeling_data`
order by game_date asc
"""


In [2]:
try:
    full_data = pd.read_csv('full_data.csv')

except:
    nba_player_data = pd.DataFrame(pandas_gbq.read_gbq(player_query,project_id='miscellaneous-projects-444203'))
    team_data = pd.DataFrame(pandas_gbq.read_gbq(team_query,project_id='miscellaneous-projects-444203'))
    features_for_team = ['home','away'] + teams_3gm_avg
    features_for_player = ['pts','reb','ast','blk','stl'] + player_3gm_avg
    full_data = nba_player_data.merge(team_data, on = ['game_id','team'], how = 'inner',suffixes=('','remove'))
    full_data.drop([column for column in full_data.columns if 'remove' in column],axis = 1 , inplace=True)
    full_data.to_csv('full_data.csv',mode = 'x')

In [3]:
full_data

Unnamed: 0.1,Unnamed: 0,player,team,game_id,game_date,matchup,pts,reb,ast,blk,...,ast_to_3gm_avg,ast_ratio_3gm_avg,oreb%_3gm_avg,dreb%_3gm_avg,reb%_3gm_avg,tov%_3gm_avg,efg%_3gm_avg,ts%_3gm_avg,pace_3gm_avg,pie_3gm_avg
0,0,Andrew Wiggins,GSW,22100002,2021-10-19,LAL,12.0,7.0,1.0,0.0,...,1.86,18.83,28.40,73.00,49.27,15.13,48.73,54.57,99.33,41.33
1,1,Brook Lopez,MIL,22100001,2021-10-19,BKN,8.0,5.0,0.0,3.0,...,1.70,17.33,23.70,74.87,49.93,14.17,62.23,65.43,99.33,51.43
2,2,Bruce Brown,BKN,22100001,2021-10-19,MIL,0.0,1.0,0.0,1.0,...,2.56,17.70,29.33,79.67,53.17,11.70,57.17,60.67,97.50,52.73
3,3,DeAndre' Bembry,BKN,22100001,2021-10-19,MIL,0.0,0.0,0.0,0.0,...,2.56,17.70,29.33,79.67,53.17,11.70,57.17,60.67,97.50,52.73
4,4,Gary Payton II,GSW,22100002,2021-10-19,LAL,0.0,0.0,0.0,0.0,...,1.86,18.83,28.40,73.00,49.27,15.13,48.73,54.57,99.33,41.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93813,93813,Deandre Ayton,POR,22400694,2025-02-01,PHX,24.0,7.0,2.0,1.0,...,1.59,17.37,38.87,68.73,54.17,15.63,56.73,61.70,94.33,55.03
93814,93814,Harrison Barnes,SAS,22400693,2025-02-01,MIA,12.0,2.0,1.0,1.0,...,3.26,19.63,28.60,62.37,45.37,13.10,54.90,57.97,101.83,45.63
93815,93815,Jalen Wilson,BKN,22400689,2025-02-01,HOU,13.0,7.0,2.0,0.0,...,1.72,17.73,26.20,63.37,44.37,14.87,48.00,54.00,94.00,49.77
93816,93816,Max Christie,LAL,22400692,2025-02-01,NYK,15.0,3.0,2.0,1.0,...,1.71,19.13,26.07,70.90,51.73,17.63,58.60,62.83,99.17,57.47
