# Getting and filtering data for each game for seasons from 2011-2022 (12 seasons)

In [60]:
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.endpoints import boxscoretraditionalv2
import pandas as pd

YEAR = 2019

COUNTER =  1
DIRECTION = "ASC"
LEAGUE = "00"
PLAYER_OR_TEAM = "T"
SEASON_TYPE = "Regular Season"
SORTER = "DATE"

def fetch_season_game_logs(season):
    # Create LeagueGameLog object instance for the season
    gamelog = leaguegamelog.LeagueGameLog(
    COUNTER, DIRECTION, LEAGUE, PLAYER_OR_TEAM, season, SEASON_TYPE, SORTER
)
    
    # Execute request and fetch data
    data = gamelog.get_data_frames()[0] 
    
    # Filter columns and add Home/Away column
    columns_to_keep = ["TEAM_ID","TEAM_NAME","GAME_ID","MATCHUP","WL", "GAME_DATE"]
    filtered_data = data[columns_to_keep].copy()
    filtered_data['HOME/AWAY'] = filtered_data['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
    filtered_data['TEAM_2_HOME/AWAY'] = 1 - filtered_data['HOME/AWAY']
    
    return filtered_data

def fetch_multiple_seasons(start_year, end_year):
    all_seasons_data = []  # List to store data for all seasons
    
    # Loop through each season from start_year to end_year (inclusive)
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year + 1)[-2:]}"  # Format season string (e.g., '2010-11')
        season_data = fetch_season_game_logs(season)
        all_seasons_data.append(season_data)
    
    # Concatenate all seasons' data into a single DataFrame
    combined_data = pd.concat(all_seasons_data, ignore_index=True)
    combined_data = combined_data.sort_values(by='GAME_ID', ascending=True)
    combined_data = combined_data.reset_index(drop=True)

    team_2_id = []
    team_2_name = []

    # Add new columns for team IDs and names from adjacent rows
    for i in range(len(combined_data)):
        if i % 2 == 0:  # Every even row
            team_2_id.append(combined_data.loc[i+1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i+1, 'TEAM_NAME'])
        else:  # Every odd row
            team_2_id.append(combined_data.loc[i-1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i-1, 'TEAM_NAME'])
    
    # Add new columns to the DataFrame
    combined_data['TEAM_2_ID'] = team_2_id
    combined_data['TEAM_2_NAME'] = team_2_name
    
    columns_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_2_HOME/AWAY"]
    combined_data = combined_data[columns_order]
    
    return combined_data

fetch_multiple_seasons(2023,2023)

Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_NAME,TEAM_ID,TEAM_2_NAME,TEAM_2_ID,WL,HOME/AWAY,TEAM_2_HOME/AWAY
0,0022300001,2023-11-03,IND vs. CLE,Indiana Pacers,1610612754,Cleveland Cavaliers,1610612739,W,1,0
1,0022300001,2023-11-03,CLE @ IND,Cleveland Cavaliers,1610612739,Indiana Pacers,1610612754,L,0,1
2,0022300002,2023-11-03,NYK @ MIL,New York Knicks,1610612752,Milwaukee Bucks,1610612749,L,0,1
3,0022300002,2023-11-03,MIL vs. NYK,Milwaukee Bucks,1610612749,New York Knicks,1610612752,W,1,0
4,0022300003,2023-11-03,MIA vs. WAS,Miami Heat,1610612748,Washington Wizards,1610612764,W,1,0
...,...,...,...,...,...,...,...,...,...,...
2455,0022301228,2023-12-08,SAC @ PHX,Sacramento Kings,1610612758,Phoenix Suns,1610612756,W,0,1
2456,0022301229,2023-12-07,IND @ MIL,Indiana Pacers,1610612754,Milwaukee Bucks,1610612749,W,0,1
2457,0022301229,2023-12-07,MIL vs. IND,Milwaukee Bucks,1610612749,Indiana Pacers,1610612754,L,1,0
2458,0022301230,2023-12-07,NOP @ LAL,New Orleans Pelicans,1610612740,Los Angeles Lakers,1610612747,L,0,1


# Aquiring Season Stats for each team 

In [61]:
from api_helpers.team_stats_helpers import load_dataframe

attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
nba_dataframe = load_dataframe(attribute_list)
nba_dataframe = nba_dataframe.drop(nba_dataframe[nba_dataframe["FGM"] == 0].index)
nba_dataframe.sort_values(by='YEAR')

nba_dataframe['YEAR'] = nba_dataframe['YEAR'].str.split('-').str[0]
nba_dataframe['YEAR'] = pd.to_numeric(nba_dataframe['YEAR'])
nba_dataframe.sort_values(by="YEAR",inplace=True)

nba_dataframe["NBA_FINALS_APPEARANCE"].fillna(0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("FINALS APPEARANCE", 0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("LEAGUE CHAMPION", 2, inplace=True)

nba_dataframe = nba_dataframe[nba_dataframe['YEAR'] == YEAR]
season_stat_dataframe = nba_dataframe.reset_index(drop=True)

season_stat_dataframe



Unnamed: 0,TEAM_ID,YEAR,WIN_PCT,NBA_FINALS_APPEARANCE,FGM,FGA,FG_PCT,OREB,DREB,AST,TOV,PTS
0,1610612737,2020,0.569,0.0,2937,6281,0.468,760,2525,1737,953,8186
1,1610612749,2020,0.639,2.0,3221,6610,0.487,741,2724,1834,995,8649
2,1610612751,2020,0.667,0.0,3106,6289,0.494,640,2559,1929,975,8537
3,1610612745,2020,0.236,0.0,2826,6372,0.444,671,2396,1699,1060,7833
4,1610612763,2020,0.528,0.0,3085,6608,0.467,803,2543,1938,957,8157
5,1610612761,2020,0.375,0.0,2859,6383,0.448,680,2314,1735,952,8011
6,1610612750,2020,0.319,0.0,2932,6546,0.448,757,2376,1846,1027,8073
7,1610612766,2020,0.458,0.0,2875,6324,0.455,762,2389,1933,1069,7881
8,1610612738,2020,0.5,0.0,2985,6401,0.466,765,2421,1689,1012,8109
9,1610612752,2020,0.569,0.0,2839,6225,0.456,696,2554,1541,932,7706


# Combining Dataframes
For each game, listing the stats of both teams for comparison

In [62]:
game_data = fetch_multiple_seasons(YEAR,YEAR)
season_team_data = season_stat_dataframe


team_1_pts = []
team_2_pts = []
team_1_fg_pct = []
team_2_fg_pct = []
team_1_fga = []
team_2_fga = []
team_1_ast = []
team_2_ast = []
team_1_oreb = []
team_2_oreb = []
team_1_dreb = []
team_2_dreb = []
team_1_tov = []
team_2_tov = []
team_1_win_pct = []
team_2_win_pct = []

# As a reminder: attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
# Add new columns for team IDs and names from adjacent rows
for i in range(len(game_data)):
    team_1_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[0]])
    team_2_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[0]])
    team_1_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[1]])
    team_2_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[1]])
    team_1_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[3]])
    team_2_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[3]])
    team_1_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[4]])
    team_2_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[4]])
    team_1_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[5]])
    team_2_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[5]])
    team_1_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[6]])
    team_2_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[6]])
    team_1_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[7]])
    team_2_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[7]])
    team_1_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[8]])
    team_2_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[8]])


game_data['TEAM_1_PTS'] = team_1_pts
game_data['TEAM_2_PTS'] = team_2_pts
game_data['TEAM_1_FG_PCT'] = team_1_fg_pct
game_data['TEAM_2_FG_PCT'] = team_2_fg_pct
game_data['TEAM_1_FGA'] = team_1_fga
game_data['TEAM_2_FGA'] = team_2_fga
game_data['TEAM_1_AST'] = team_1_ast
game_data['TEAM_2_AST'] = team_2_ast
game_data['TEAM_1_OREB'] = team_1_oreb
game_data['TEAM_2_OREB'] = team_2_oreb
game_data['TEAM_1_DREB'] = team_1_dreb
game_data['TEAM_2_DREB'] = team_2_dreb
game_data['TEAM_1_TOV'] = team_1_tov
game_data['TEAM_2_TOV'] = team_2_tov
game_data['TEAM_1_WIN_PCT'] = team_1_win_pct
game_data['TEAM_2_WIN_PCT'] = team_2_win_pct

game_data = game_data.iloc[::2]
game_data.reset_index(drop=True, inplace=True)
game_data





Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_NAME,TEAM_ID,TEAM_2_NAME,TEAM_2_ID,WL,HOME/AWAY,TEAM_2_HOME/AWAY,...,TEAM_1_AST,TEAM_2_AST,TEAM_1_OREB,TEAM_2_OREB,TEAM_1_DREB,TEAM_2_DREB,TEAM_1_TOV,TEAM_2_TOV,TEAM_1_WIN_PCT,TEAM_2_WIN_PCT
0,0022000001,2020-12-22,GSW @ BKN,Golden State Warriors,1610612744,Brooklyn Nets,1610612751,L,0,1,...,1991,1929,574,640,2524,2559,1080,975,0.542,0.667
1,0022000002,2020-12-22,LAL vs. LAC,Los Angeles Lakers,1610612747,LA Clippers,1610612746,L,1,0,...,1775,1756,695,678,2490,2501,1095,949,0.583,0.653
2,0022000003,2020-12-23,MIL @ BOS,Milwaukee Bucks,1610612749,Boston Celtics,1610612738,L,0,1,...,1834,1689,741,765,2724,2421,995,1012,0.639,0.500
3,0022000004,2020-12-23,DAL @ PHX,Dallas Mavericks,1610612742,Phoenix Suns,1610612756,L,0,1,...,1647,1939,657,630,2463,2462,869,902,0.583,0.708
4,0022000005,2020-12-25,NOP @ MIA,New Orleans Pelicans,1610612740,Miami Heat,1610612748,L,0,1,...,1872,1895,845,579,2568,2409,1052,1013,0.431,0.556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,0022001076,2021-05-16,POR vs. DEN,Portland Trail Blazers,1610612757,Denver Nuggets,1610612743,W,1,0,...,1531,1933,766,758,2441,2442,799,972,0.583,0.653
1076,0022001077,2021-05-16,UTA @ SAC,Utah Jazz,1610612762,Sacramento Kings,1610612758,W,0,1,...,1703,1836,765,674,2709,2307,1023,963,0.722,0.431
1077,0022001078,2021-05-16,SAS vs. PHX,San Antonio Spurs,1610612759,Phoenix Suns,1610612756,L,1,0,...,1759,1939,669,630,2489,2462,821,902,0.458,0.708
1078,0022001079,2021-05-16,IND @ TOR,Indiana Pacers,1610612754,Toronto Raptors,1610612761,W,0,1,...,1973,1735,648,680,2424,2314,975,952,0.472,0.375


# More data processing/redefining teams as winners and losers   

In [63]:
new_column_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_1_PTS", "TEAM_1_FGA", "TEAM_1_FG_PCT", "TEAM_1_OREB","TEAM_1_DREB", "TEAM_1_AST", "TEAM_1_TOV", "TEAM_1_WIN_PCT", "TEAM_2_HOME/AWAY", "TEAM_2_PTS", "TEAM_2_FGA", "TEAM_2_FG_PCT", "TEAM_2_OREB","TEAM_2_DREB","TEAM_2_AST", "TEAM_2_TOV", "TEAM_2_WIN_PCT"]
sorted_game_data = game_data[new_column_order]
sorted_game_data.rename(columns={'TEAM_NAME': 'TEAM_1_NAME', 'TEAM_ID': 'TEAM_1_ID', 'HOME/AWAY': 'TEAM_1_HOME/AWAY', 'WL': 'TEAM_1_WIN/LOSS'}, inplace=True)

sorted_game_data


Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_1_NAME,TEAM_1_ID,TEAM_2_NAME,TEAM_2_ID,TEAM_1_WIN/LOSS,TEAM_1_HOME/AWAY,TEAM_1_PTS,...,TEAM_1_WIN_PCT,TEAM_2_HOME/AWAY,TEAM_2_PTS,TEAM_2_FGA,TEAM_2_FG_PCT,TEAM_2_OREB,TEAM_2_DREB,TEAM_2_AST,TEAM_2_TOV,TEAM_2_WIN_PCT
0,0022000001,2020-12-22,GSW @ BKN,Golden State Warriors,1610612744,Brooklyn Nets,1610612751,L,0,8187,...,0.542,1,8537,6289,0.494,640,2559,1929,975,0.667
1,0022000002,2020-12-22,LAL vs. LAC,Los Angeles Lakers,1610612747,LA Clippers,1610612746,L,1,7887,...,0.583,0,8209,6242,0.482,678,2501,1756,949,0.653
2,0022000003,2020-12-23,MIL @ BOS,Milwaukee Bucks,1610612749,Boston Celtics,1610612738,L,0,8649,...,0.639,1,8109,6401,0.466,765,2421,1689,1012,0.500
3,0022000004,2020-12-23,DAL @ PHX,Dallas Mavericks,1610612742,Phoenix Suns,1610612756,L,0,8096,...,0.583,1,8300,6357,0.490,630,2462,1939,902,0.708
4,0022000005,2020-12-25,NOP @ MIA,New Orleans Pelicans,1610612740,Miami Heat,1610612748,L,0,8251,...,0.431,1,7781,6029,0.468,579,2409,1895,1013,0.556
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,0022001076,2021-05-16,POR vs. DEN,Portland Trail Blazers,1610612757,Denver Nuggets,1610612743,W,1,8360,...,0.583,0,8284,6422,0.485,758,2442,1933,972,0.653
1076,0022001077,2021-05-16,UTA @ SAC,Utah Jazz,1610612762,Sacramento Kings,1610612758,W,0,8382,...,0.722,1,8189,6382,0.481,674,2307,1836,963,0.431
1077,0022001078,2021-05-16,SAS vs. PHX,San Antonio Spurs,1610612759,Phoenix Suns,1610612756,L,1,7998,...,0.458,0,8300,6357,0.490,630,2462,1939,902,0.708
1078,0022001079,2021-05-16,IND @ TOR,Indiana Pacers,1610612754,Toronto Raptors,1610612761,W,0,8302,...,0.472,1,8011,6383,0.448,680,2314,1735,952,0.375


# Saving data... One year at a time 


In [64]:
sorted_game_data.to_csv(f'data/game_by_game_season_stats/game_data_{YEAR}.csv', index=False)