In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network._multilayer_perceptron import MLPRegressor
from google.oauth2 import service_account
from datetime import datetime as date

import pandas as pd
import pandas_gbq


In [23]:
player_3gm_avg = ["min_3gm_avg", "fgm_3gm_avg", "fga_3gm_avg", "fg%_3gm_avg", "3pm_3gm_avg", 
                   "3pa_3gm_avg", "3p%_3gm_avg", "ftm_3gm_avg", "fta_3gm_avg", "ft%_3gm_avg", 
                   "oreb_3gm_avg", "dreb_3gm_avg", "reb_3gm_avg", "ast_3gm_avg", "stl_3gm_avg", 
                   "blk_3gm_avg", "to_3gm_avg", "pf_3gm_avg", "pts_3gm_avg", "plus_mins_3gm_avg"]

teams_3gm_avg = ["offrtg_3gm_avg", "defrtg_3gm_avg", "netrtg_3gm_avg", "ast%_3gm_avg", "ast_to_3gm_avg", 
                    "ast_ratio_3gm_avg", "oreb%_3gm_avg", "dreb%_3gm_avg", "reb%_3gm_avg", "tov%_3gm_avg", 
                    "efg%_3gm_avg", "ts%_3gm_avg", "pace_3gm_avg", "pie_3gm_avg"]

#using shifted windows for rolling data to prevent data leakage
player_query = f""" 
SELECT player,team,game_id,game_date,matchup,pts,reb,ast,blk,stl,`3pm`, {','.join([f'`{player}`' for player in player_3gm_avg])}
from `capstone_data.player_modeling_data`
order by game_date asc
"""

team_query = f"""
SELECT team,game_id,game_date,home,away,`match up`, {', '.join([f'`{team}`' for team in teams_3gm_avg])}
from `capstone_data.team_modeling_data`
order by game_date asc
"""


In [24]:
nba_player_data = pd.DataFrame(pandas_gbq.read_gbq(player_query,project_id='miscellaneous-projects-444203'))

Downloading: 100%|[32m██████████[0m|


In [4]:
team_data = pd.DataFrame(pandas_gbq.read_gbq(team_query,project_id='miscellaneous-projects-444203'))

Downloading: 100%|[32m██████████[0m|


In [25]:
features_for_team = ['home','away'] + teams_3gm_avg
features_for_player = ['pts','reb','ast','blk','stl'] + player_3gm_avg

In [None]:
full_data = nba_player_data.merge(team_data, on = ['game_id','team'], how = 'inner',suffixes=('','remove'))

full_data.drop([column for column in full_data.columns if 'remove' in column],axis = 1 , inplace=True)

Unnamed: 0,player,team,game_id,game_date,matchup,pts,reb,ast,blk,stl,3pm,min_3gm_avg,fgm_3gm_avg,fga_3gm_avg,fg%_3gm_avg,3pm_3gm_avg,3pa_3gm_avg,3p%_3gm_avg,ftm_3gm_avg,fta_3gm_avg,ft%_3gm_avg,oreb_3gm_avg,dreb_3gm_avg,reb_3gm_avg,ast_3gm_avg,stl_3gm_avg,blk_3gm_avg,to_3gm_avg,pf_3gm_avg,pts_3gm_avg,plus_mins_3gm_avg,home,away,match up,offrtg_3gm_avg,defrtg_3gm_avg,netrtg_3gm_avg,ast%_3gm_avg,ast_to_3gm_avg,ast_ratio_3gm_avg,oreb%_3gm_avg,dreb%_3gm_avg,reb%_3gm_avg,tov%_3gm_avg,efg%_3gm_avg,ts%_3gm_avg,pace_3gm_avg,pie_3gm_avg
0,Joe Harris,BKN,0022100001,2021-10-19,MIL,9.0,2.0,2.0,0.0,0.0,3.0,12.78,1.00,4.00,70.00,0.33,2.00,5.57,1.00,1.33,25.00,1.67,3.00,4.67,0.67,0.00,0.00,0.00,1.00,3.33,-1.67,0,1,BKN @ MIL,122.67,112.27,10.40,55.13,2.56,17.70,29.33,79.67,53.17,11.70,57.17,60.67,97.50,52.73
1,Brook Lopez,MIL,0022100001,2021-10-19,BKN,8.0,5.0,0.0,3.0,1.0,2.0,5.24,1.00,1.67,66.67,0.33,0.67,33.33,0.33,0.33,33.33,0.00,1.33,1.33,1.67,0.33,0.00,0.67,0.33,2.67,-0.67,1,0,MIL vs. BKN,123.73,116.70,7.07,53.47,1.70,17.33,23.70,74.87,49.93,14.17,62.23,65.43,99.33,51.43
2,Bruce Brown,BKN,0022100001,2021-10-19,MIL,0.0,1.0,0.0,1.0,0.0,0.0,24.66,7.67,13.67,57.23,1.67,4.00,41.67,3.00,3.00,33.33,2.00,4.00,6.00,0.00,0.67,1.67,0.67,1.33,20.00,10.33,0,1,BKN @ MIL,122.67,112.27,10.40,55.13,2.56,17.70,29.33,79.67,53.17,11.70,57.17,60.67,97.50,52.73
3,Moses Moody,GSW,0022100002,2021-10-19,LAL,2.0,2.0,0.0,0.0,0.0,0.0,13.51,3.00,4.33,79.03,0.00,0.00,0.00,0.67,1.33,16.67,1.67,3.00,4.67,0.00,0.33,0.33,0.67,2.00,6.67,-10.33,0,1,GSW @ LAL,107.70,127.57,-19.87,73.27,1.86,18.83,28.40,73.00,49.27,15.13,48.73,54.57,99.33,41.33
4,Nic Claxton,BKN,0022100001,2021-10-19,MIL,12.0,7.0,0.0,0.0,0.0,0.0,17.03,0.33,1.33,11.10,0.00,0.00,0.00,1.00,1.67,33.33,1.00,1.00,2.00,0.67,1.00,0.33,0.67,3.00,1.67,-15.33,0,1,BKN @ MIL,122.67,112.27,10.40,55.13,2.56,17.70,29.33,79.67,53.17,11.70,57.17,60.67,97.50,52.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89067,Rudy Gobert,MIN,0022400690,2025-02-01,WAS,16.0,16.0,4.0,2.0,2.0,0.0,35.31,5.00,7.33,64.27,0.00,0.00,0.00,2.67,4.33,41.27,2.67,5.33,8.00,2.33,0.33,2.67,1.00,1.67,12.67,8.00,1,0,MIN vs. WAS,123.17,107.43,15.73,64.13,1.85,19.90,26.70,73.50,51.67,15.97,61.27,64.90,98.33,57.23
89068,Jalen Wilson,BKN,0022400689,2025-02-01,HOU,13.0,7.0,2.0,0.0,1.0,1.0,36.75,4.33,11.33,42.50,1.67,5.33,33.33,3.67,4.33,83.33,1.33,2.67,4.00,4.33,1.00,0.00,1.67,2.33,14.00,-3.00,0,1,BKN @ HOU,105.50,105.80,-0.30,69.73,1.72,17.73,26.20,63.37,44.37,14.87,48.00,54.00,94.00,49.77
89069,Max Christie,LAL,0022400692,2025-02-01,NYK,15.0,3.0,2.0,1.0,0.0,1.0,30.56,2.67,7.00,37.10,2.33,5.67,42.23,1.00,1.00,66.67,0.00,3.33,3.33,2.67,0.33,0.33,1.33,2.33,8.67,9.00,0,1,LAL @ NYK,118.20,107.57,10.63,60.30,1.71,19.13,26.07,70.90,51.73,17.63,58.60,62.83,99.17,57.47
89070,Deandre Ayton,POR,0022400694,2025-02-01,PHX,24.0,7.0,2.0,1.0,0.0,0.0,34.28,10.00,15.00,69.77,0.00,0.67,0.00,1.67,2.67,41.67,5.00,7.33,12.33,2.33,1.67,0.33,1.67,1.00,21.67,19.00,1,0,POR vs. PHX,124.93,112.53,12.40,59.17,1.59,17.37,38.87,68.73,54.17,15.63,56.73,61.70,94.33,55.03


In [40]:
full_data.to_csv('full_data.csv',mode = 'x')

FileExistsError: [Errno 17] File exists: 'full_data.csv'