In [65]:
from nba_api.stats.endpoints import leaguegamelog
import pandas as pd
import inspect

In [66]:
games = leaguegamelog.LeagueGameLog(season = '2018')

In [67]:
df = pd.DataFrame(games.get_data_frames()[0])

In [None]:
df

In [55]:
#Keep relevant variables
df = df[['TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']]

In [14]:
#Going to start by keeping all the predictors and then building a dictionary of 5 day moving averages
df = df[['TEAM_NAME',
       'GAME_DATE', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']]

In [5]:
teams = df['TEAM_NAME'].unique()

In [8]:
teams

array(['Oklahoma City Thunder', 'Golden State Warriors',
       'Philadelphia 76ers', 'Boston Celtics', 'Minnesota Timberwolves',
       'San Antonio Spurs', 'New York Knicks', 'Atlanta Hawks',
       'Dallas Mavericks', 'Phoenix Suns', 'LA Clippers',
       'Denver Nuggets', 'Orlando Magic', 'Miami Heat',
       'New Orleans Pelicans', 'Houston Rockets', 'Detroit Pistons',
       'Brooklyn Nets', 'Memphis Grizzlies', 'Indiana Pacers',
       'Utah Jazz', 'Sacramento Kings', 'Charlotte Hornets',
       'Milwaukee Bucks', 'Cleveland Cavaliers', 'Toronto Raptors',
       'Chicago Bulls', 'Los Angeles Lakers', 'Portland Trail Blazers',
       'Washington Wizards'], dtype=object)

In [10]:
stat_dict = {}

In [None]:
home_df = df[df['MATCHUP'].str.contains("vs.")]
away_df = df[df['MATCHUP'].str.contains("@")]

In [None]:
merged = home_df.merge(away_df, on = "GAME_ID",suffixes = ("_H", "_A"))

In [None]:
merged = merged.sort_values(by = ['GAME_DATE'])

In [64]:
#Can use this to get means and standard deviations
def generate_team_stats(game_df): 
    general_stat_dict = {}
    stat_dict_with_ids = {}
    for team in teams:
        temp_df = df[df['TEAM_NAME'] == team]
        temp_df = temp_df.sort_values(by = ['GAME_DATE'])
        temp_df = temp_df.set_index('GAME_ID')
        temp_df = temp_df[[
           'FGM', 'FGA', 'FG_PCT', 'FG3M',
           'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
           'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']]
        final_stats_df = temp_df.rolling(5).mean().shift(periods = 1).iloc[5:].to_dict('list')
        final_organized_stats = temp_df.rolling(5).mean().shift(periods = 1).iloc[5:].to_dict('index')
        stat_dict[team] = final_stats_df
        stat_dict_with_ids[team] = final_organized_stats
    return stat_dict, stat_dict_with_ids

In [69]:
dict1, dict2 = generate_team_stats(df)

In [70]:
dict2

{'Oklahoma City Thunder': {'0021800103': {'FGM': 38.6,
   'FGA': 93.8,
   'FG_PCT': 0.41259999999999997,
   'FG3M': 8.0,
   'FG3A': 32.2,
   'FG3_PCT': 0.251,
   'FTM': 19.6,
   'FTA': 30.0,
   'FT_PCT': 0.6534000000000001,
   'OREB': 14.8,
   'DREB': 34.8,
   'REB': 49.6,
   'AST': 20.6,
   'STL': 9.8,
   'BLK': 5.6,
   'TOV': 15.4,
   'PF': 26.0,
   'PTS': 104.8,
   'PLUS_MINUS': -6.8},
  '0021800111': {'FGM': 41.6,
   'FGA': 94.4,
   'FG_PCT': 0.4422,
   'FG3M': 7.8,
   'FG3A': 28.2,
   'FG3_PCT': 0.3028,
   'FTM': 19.4,
   'FTA': 29.0,
   'FT_PCT': 0.6674,
   'OREB': 13.4,
   'DREB': 35.4,
   'REB': 48.8,
   'AST': 20.2,
   'STL': 10.6,
   'BLK': 5.6,
   'TOV': 15.8,
   'PF': 28.8,
   'PTS': 110.4,
   'PLUS_MINUS': -1.6},
  '0021800119': {'FGM': 43.0,
   'FGA': 94.6,
   'FG_PCT': 0.45599999999999996,
   'FG3M': 8.0,
   'FG3A': 29.0,
   'FG3_PCT': 0.3036,
   'FTM': 20.2,
   'FTA': 30.2,
   'FT_PCT': 0.6644,
   'OREB': 13.2,
   'DREB': 35.0,
   'REB': 48.2,
   'AST': 20.6,
   'STL': 

In [60]:
temp_df = df[df['TEAM_NAME'] == 'Oklahoma City Thunder']

In [62]:
temp_df = temp_df.set_index('GAME_ID')

In [50]:
game_ids = temp_df['GAME_ID']

In [63]:
temp_df = temp_df[[
           'FGM', 'FGA', 'FG_PCT', 'FG3M',
           'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
           'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']]
temp_df.rolling(5).mean().shift(periods = 1).iloc[5:]

Unnamed: 0_level_0,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
GAME_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0021800103,38.6,93.8,0.4126,8.0,32.2,0.2510,19.6,30.0,0.6534,14.8,34.8,49.6,20.6,9.8,5.6,15.4,26.0,104.8,-6.8
0021800111,41.6,94.4,0.4422,7.8,28.2,0.3028,19.4,29.0,0.6674,13.4,35.4,48.8,20.2,10.6,5.6,15.8,28.8,110.4,-1.6
0021800119,43.0,94.6,0.4560,8.0,29.0,0.3036,20.2,30.2,0.6644,13.2,35.0,48.2,20.6,12.0,6.6,15.2,27.2,114.2,2.4
0021800144,43.6,91.0,0.4816,9.2,27.6,0.3512,20.6,29.4,0.6994,11.0,34.6,45.6,22.8,12.4,5.8,14.2,25.4,117.0,9.2
0021800153,45.2,90.8,0.4996,9.8,27.4,0.3752,22.2,30.6,0.7294,9.6,32.8,42.4,24.2,14.4,5.2,13.4,24.6,122.4,11.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0021801161,40.4,93.4,0.4340,12.8,37.0,0.3422,13.2,18.2,0.7308,11.8,29.8,41.6,22.8,7.0,5.6,10.2,19.6,106.8,-2.0
0021801186,41.4,95.0,0.4368,11.4,37.2,0.3082,13.2,19.4,0.6794,12.8,31.0,43.8,23.4,8.2,5.8,10.0,18.2,107.4,-0.2
0021801197,43.4,96.2,0.4518,11.4,35.8,0.3244,13.2,19.8,0.6658,14.6,32.8,47.4,26.0,8.6,5.8,11.4,18.0,111.4,4.8
0021801218,44.8,96.2,0.4670,11.6,35.4,0.3346,15.2,21.2,0.7086,15.6,32.2,47.8,25.6,8.4,5.2,12.8,21.2,116.4,4.4


In [53]:
game_ids.shift(periods = 5).iloc[5:]

204     0021800002
230     0021800025
247     0021800037
294     0021800065
308     0021800085
           ...    
2326    0021801083
2383    0021801104
2411    0021801119
2434    0021801133
2451    0021801146
Name: GAME_ID, Length: 77, dtype: object

In [38]:
temp_df = temp_df.sort_values(by = ['GAME_DATE'])

In [39]:
temp_df = temp_df[[
       'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS']]

In [26]:
temp_df.rolling(5).mean().shift(periods = 1).tail(10)

Unnamed: 0,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
2171,38.2,93.6,0.4092,12.2,35.8,0.3498,16.0,23.6,0.6898,14.0,34.8,48.8,21.2,6.8,5.2,16.4,21.0,104.6,-6.0
2211,38.4,93.2,0.4132,14.0,38.8,0.3642,15.4,22.6,0.7034,13.2,33.2,46.4,23.0,6.8,5.2,16.6,21.2,106.2,-7.0
2241,38.4,95.0,0.4048,13.6,40.0,0.3374,15.2,22.6,0.6934,14.0,31.8,45.8,21.6,6.2,5.0,15.0,20.6,105.6,-9.0
2268,40.4,94.2,0.4294,13.6,39.0,0.3462,15.0,21.8,0.7068,12.4,31.8,44.2,23.8,6.8,5.4,13.2,19.8,109.4,-3.0
2300,41.4,94.4,0.4398,13.2,37.0,0.3514,13.0,19.6,0.6888,12.0,29.4,41.4,23.4,7.0,4.8,12.0,19.8,109.0,-3.2
2326,40.4,93.4,0.434,12.8,37.0,0.3422,13.2,18.2,0.7308,11.8,29.8,41.6,22.8,7.0,5.6,10.2,19.6,106.8,-2.0
2382,41.4,95.0,0.4368,11.4,37.2,0.3082,13.2,19.4,0.6794,12.8,31.0,43.8,23.4,8.2,5.8,10.0,18.2,107.4,-0.2
2410,43.4,96.2,0.4518,11.4,35.8,0.3244,13.2,19.8,0.6658,14.6,32.8,47.4,26.0,8.6,5.8,11.4,18.0,111.4,4.8
2434,44.8,96.2,0.467,11.6,35.4,0.3346,15.2,21.2,0.7086,15.6,32.2,47.8,25.6,8.4,5.2,12.8,21.2,116.4,4.4
2451,44.2,97.8,0.4526,13.2,38.2,0.3534,16.2,22.0,0.731,17.0,35.0,52.0,25.8,8.4,4.6,12.6,21.6,117.8,6.6


In [56]:
df.head(5)

Unnamed: 0,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS
0,1610612751,BKN,Brooklyn Nets,22000001,2020-12-22,BKN vs. GSW,W,240,42,92,...,13,44,57,24,11,7,20,22,125,26
1,1610612744,GSW,Golden State Warriors,22000001,2020-12-22,GSW @ BKN,L,240,37,99,...,13,34,47,26,6,6,18,24,99,-26
2,1610612746,LAC,LA Clippers,22000002,2020-12-22,LAC @ LAL,W,240,44,93,...,11,29,40,22,10,3,16,29,116,7
3,1610612747,LAL,Los Angeles Lakers,22000002,2020-12-22,LAL vs. LAC,L,240,38,81,...,8,37,45,22,4,2,19,20,109,-7
4,1610612738,BOS,Boston Celtics,22000003,2020-12-23,BOS vs. MIL,W,240,48,101,...,10,27,37,23,8,6,7,17,122,1


In [67]:
#Split data frames into home and away teams
home_df = df[df['MATCHUP'].str.contains("vs.")]
away_df = df[df['MATCHUP'].str.contains("@")]

In [71]:
merged = home_df.merge(away_df, on = "GAME_ID",suffixes = ("_H", "_A"))

In [72]:
merged

Unnamed: 0,TEAM_ID_H,TEAM_ABBREVIATION_H,TEAM_NAME_H,GAME_ID,GAME_DATE_H,MATCHUP_H,WL_H,MIN_H,FGM_H,FGA_H,...,OREB_A,DREB_A,REB_A,AST_A,STL_A,BLK_A,TOV_A,PF_A,PTS_A,PLUS_MINUS_A
0,1610612751,BKN,Brooklyn Nets,0022000001,2020-12-22,BKN vs. GSW,W,240,42,92,...,13,34,47,26,6,6,18,24,99,-26
1,1610612747,LAL,Los Angeles Lakers,0022000002,2020-12-22,LAL vs. LAC,L,240,38,81,...,11,29,40,22,10,3,16,29,116,7
2,1610612738,BOS,Boston Celtics,0022000003,2020-12-23,BOS vs. MIL,W,240,48,101,...,11,41,52,19,4,6,16,20,121,-1
3,1610612755,PHI,Philadelphia 76ers,0022000013,2020-12-23,PHI vs. WAS,W,240,41,87,...,5,35,40,28,7,4,20,26,107,-6
4,1610612763,MEM,Memphis Grizzlies,0022000017,2020-12-23,MEM vs. SAS,L,240,49,95,...,8,40,48,28,8,8,11,18,131,12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,1610612750,MIN,Minnesota Timberwolves,0022001071,2021-05-16,MIN vs. DAL,W,240,49,90,...,11,27,38,32,5,5,16,19,121,-15
1076,1610612764,WAS,Washington Wizards,0022001080,2021-05-16,WAS vs. CHA,W,240,44,91,...,10,32,42,25,8,5,13,15,110,-5
1077,1610612761,TOR,Toronto Raptors,0022001079,2021-05-16,TOR vs. IND,L,240,39,97,...,10,37,47,34,8,3,13,19,125,12
1078,1610612760,OKC,Oklahoma City Thunder,0022001074,2021-05-16,OKC vs. LAC,W,240,50,94,...,16,28,44,17,8,3,3,14,112,-5


In [77]:
#make list of all team names, for loop filter by team n times, and make the rolling dict values?
# Can then go in and replace values/make new df with the normalized vals

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22018,1610612760,OKC,Oklahoma City Thunder,0021800002,2018-10-16,OKC @ GSW,L,240,33,...,29,45,21,12,6,15,21,100,-8,1
1,22018,1610612744,GSW,Golden State Warriors,0021800002,2018-10-16,GSW vs. OKC,W,240,42,...,41,58,28,7,7,21,29,108,8,1
2,22018,1610612755,PHI,Philadelphia 76ers,0021800001,2018-10-16,PHI @ BOS,L,240,34,...,41,47,18,8,5,16,20,87,-18,1
3,22018,1610612738,BOS,Boston Celtics,0021800001,2018-10-16,BOS vs. PHI,W,240,42,...,43,55,21,7,5,15,20,105,18,1
4,22018,1610612750,MIN,Minnesota Timberwolves,0021800010,2018-10-17,MIN @ SAS,L,240,39,...,32,46,20,9,2,11,27,108,-4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,22018,1610612743,DEN,Denver Nuggets,0021801228,2019-04-10,DEN vs. MIN,W,240,39,...,41,53,23,6,4,13,12,99,4,1
2456,22018,1610612766,CHA,Charlotte Hornets,0021801222,2019-04-10,CHA vs. ORL,L,240,41,...,28,32,21,3,4,8,14,114,-8,1
2457,22018,1610612753,ORL,Orlando Magic,0021801222,2019-04-10,ORL @ CHA,W,240,48,...,30,39,24,5,2,6,19,122,8,1
2458,22018,1610612744,GSW,Golden State Warriors,0021801225,2019-04-10,GSW @ MEM,L,240,46,...,30,39,32,3,5,14,17,117,-15,1
