# Getting and filtering data for each game for seasons from 2011-2022 (12 seasons)

In [115]:
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.endpoints import boxscoretraditionalv2
import pandas as pd

YEAR = 2010

COUNTER =  1
DIRECTION = "ASC"
LEAGUE = "00"
PLAYER_OR_TEAM = "T"
SEASON_TYPE = "Regular Season"
SORTER = "DATE"

def fetch_season_game_logs(season):
    # Create LeagueGameLog object instance for the season
    gamelog = leaguegamelog.LeagueGameLog(
    COUNTER, DIRECTION, LEAGUE, PLAYER_OR_TEAM, season, SEASON_TYPE, SORTER
)
    
    # Execute request and fetch data
    data = gamelog.get_data_frames()[0] 
    
    # Filter columns and add Home/Away column
    columns_to_keep = ["TEAM_ID","TEAM_NAME","GAME_ID","MATCHUP","WL", "GAME_DATE"]
    filtered_data = data[columns_to_keep].copy()
    filtered_data['HOME/AWAY'] = filtered_data['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
    filtered_data['TEAM_2_HOME/AWAY'] = 1 - filtered_data['HOME/AWAY']
    
    return filtered_data

def fetch_multiple_seasons(start_year, end_year):
    all_seasons_data = []  # List to store data for all seasons
    
    # Loop through each season from start_year to end_year (inclusive)
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year + 1)[-2:]}"  # Format season string (e.g., '2010-11')
        season_data = fetch_season_game_logs(season)
        all_seasons_data.append(season_data)
    
    # Concatenate all seasons' data into a single DataFrame
    combined_data = pd.concat(all_seasons_data, ignore_index=True)
    combined_data = combined_data.sort_values(by='GAME_ID', ascending=True)
    combined_data = combined_data.reset_index(drop=True)

    team_2_id = []
    team_2_name = []

    # Add new columns for team IDs and names from adjacent rows
    for i in range(len(combined_data)):
        if i % 2 == 0:  # Every even row
            team_2_id.append(combined_data.loc[i+1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i+1, 'TEAM_NAME'])
        else:  # Every odd row
            team_2_id.append(combined_data.loc[i-1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i-1, 'TEAM_NAME'])
    
    # Add new columns to the DataFrame
    combined_data['TEAM_2_ID'] = team_2_id
    combined_data['TEAM_2_NAME'] = team_2_name
    
    columns_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_2_HOME/AWAY"]
    combined_data = combined_data[columns_order]
    
    return combined_data

fetch_multiple_seasons(2023,2023)

Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_NAME,TEAM_ID,TEAM_2_NAME,TEAM_2_ID,WL,HOME/AWAY,TEAM_2_HOME/AWAY
0,0022300001,2023-11-03,IND vs. CLE,Indiana Pacers,1610612754,Cleveland Cavaliers,1610612739,W,1,0
1,0022300001,2023-11-03,CLE @ IND,Cleveland Cavaliers,1610612739,Indiana Pacers,1610612754,L,0,1
2,0022300002,2023-11-03,NYK @ MIL,New York Knicks,1610612752,Milwaukee Bucks,1610612749,L,0,1
3,0022300002,2023-11-03,MIL vs. NYK,Milwaukee Bucks,1610612749,New York Knicks,1610612752,W,1,0
4,0022300003,2023-11-03,MIA vs. WAS,Miami Heat,1610612748,Washington Wizards,1610612764,W,1,0
...,...,...,...,...,...,...,...,...,...,...
2455,0022301228,2023-12-08,SAC @ PHX,Sacramento Kings,1610612758,Phoenix Suns,1610612756,W,0,1
2456,0022301229,2023-12-07,IND @ MIL,Indiana Pacers,1610612754,Milwaukee Bucks,1610612749,W,0,1
2457,0022301229,2023-12-07,MIL vs. IND,Milwaukee Bucks,1610612749,Indiana Pacers,1610612754,L,1,0
2458,0022301230,2023-12-07,NOP @ LAL,New Orleans Pelicans,1610612740,Los Angeles Lakers,1610612747,L,0,1


# Aquiring Season Stats for each team 

In [116]:
from api_helpers.team_stats_helpers import load_dataframe

attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
nba_dataframe = load_dataframe(attribute_list)
nba_dataframe = nba_dataframe.drop(nba_dataframe[nba_dataframe["FGM"] == 0].index)
nba_dataframe.sort_values(by='YEAR')

nba_dataframe['YEAR'] = nba_dataframe['YEAR'].str.split('-').str[0]
nba_dataframe['YEAR'] = pd.to_numeric(nba_dataframe['YEAR'])
nba_dataframe.sort_values(by="YEAR",inplace=True)

nba_dataframe["NBA_FINALS_APPEARANCE"].fillna(0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("FINALS APPEARANCE", 0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("LEAGUE CHAMPION", 2, inplace=True)

nba_dataframe = nba_dataframe[nba_dataframe['YEAR'] == YEAR]
season_stat_dataframe = nba_dataframe.reset_index(drop=True)

season_stat_dataframe



Unnamed: 0,TEAM_ID,YEAR,WIN_PCT,NBA_FINALS_APPEARANCE,FGM,FGA,FG_PCT,OREB,DREB,AST,TOV,PTS
0,1610612745,2010,0.524,0.0,3170,6975,0.454,962,2549,1955,1110,8685
1,1610612737,2010,0.537,0.0,2971,6429,0.462,762,2460,1802,1118,7790
2,1610612739,2010,0.232,0.0,2886,6647,0.434,856,2449,1720,1166,7827
3,1610612759,2010,0.744,0.0,3148,6628,0.475,829,2603,1836,1101,8502
4,1610612749,2010,0.427,0.0,2814,6544,0.43,862,2480,1545,1103,7534
5,1610612751,2010,0.293,0.0,2918,6638,0.44,909,2440,1723,1152,7722
6,1610612744,2010,0.439,0.0,3251,7047,0.461,955,2370,1847,1198,8477
7,1610612758,2010,0.293,0.0,3134,6979,0.449,1071,2526,1675,1324,8151
8,1610612750,2010,0.207,0.0,3090,7014,0.441,1085,2556,1650,1398,8288
9,1610612762,2010,0.476,0.0,3064,6590,0.465,898,2338,1921,1175,8153


# Combining Dataframes
For each game, listing the stats of both teams for comparison

In [117]:
game_data = fetch_multiple_seasons(YEAR,YEAR)
season_team_data = season_stat_dataframe


team_1_pts = []
team_2_pts = []
team_1_fg_pct = []
team_2_fg_pct = []
team_1_fga = []
team_2_fga = []
team_1_ast = []
team_2_ast = []
team_1_oreb = []
team_2_oreb = []
team_1_dreb = []
team_2_dreb = []
team_1_tov = []
team_2_tov = []
team_1_win_pct = []
team_2_win_pct = []

# As a reminder: attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
# Add new columns for team IDs and names from adjacent rows
for i in range(len(game_data)):
    team_1_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[0]])
    team_2_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[0]])
    team_1_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[1]])
    team_2_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[1]])
    team_1_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[3]])
    team_2_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[3]])
    team_1_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[4]])
    team_2_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[4]])
    team_1_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[5]])
    team_2_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[5]])
    team_1_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[6]])
    team_2_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[6]])
    team_1_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[7]])
    team_2_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[7]])
    team_1_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[8]])
    team_2_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[8]])


game_data['TEAM_1_PTS'] = team_1_pts
game_data['TEAM_2_PTS'] = team_2_pts
game_data['TEAM_1_FG_PCT'] = team_1_fg_pct
game_data['TEAM_2_FG_PCT'] = team_2_fg_pct
game_data['TEAM_1_FGA'] = team_1_fga
game_data['TEAM_2_FGA'] = team_2_fga
game_data['TEAM_1_AST'] = team_1_ast
game_data['TEAM_2_AST'] = team_2_ast
game_data['TEAM_1_OREB'] = team_1_oreb
game_data['TEAM_2_OREB'] = team_2_oreb
game_data['TEAM_1_DREB'] = team_1_dreb
game_data['TEAM_2_DREB'] = team_2_dreb
game_data['TEAM_1_TOV'] = team_1_tov
game_data['TEAM_2_TOV'] = team_2_tov
game_data['TEAM_1_WIN_PCT'] = team_1_win_pct
game_data['TEAM_2_WIN_PCT'] = team_2_win_pct

game_data = game_data.iloc[::2]
game_data.reset_index(drop=True, inplace=True)
game_data





Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_NAME,TEAM_ID,TEAM_2_NAME,TEAM_2_ID,WL,HOME/AWAY,TEAM_2_HOME/AWAY,...,TEAM_1_AST,TEAM_2_AST,TEAM_1_OREB,TEAM_2_OREB,TEAM_1_DREB,TEAM_2_DREB,TEAM_1_TOV,TEAM_2_TOV,TEAM_1_WIN_PCT,TEAM_2_WIN_PCT
0,0021000001,2010-10-26,BOS vs. MIA,Boston Celtics,1610612738,Miami Heat,1610612748,W,1,0,...,1921,1639,639,790,2542,2666,1195,1142,0.683,0.707
1,0021000002,2010-10-26,PHX @ POR,Phoenix Suns,1610612756,Portland Trail Blazers,1610612757,L,0,1,...,1945,1736,821,996,2478,2230,1169,1070,0.488,0.585
2,0021000003,2010-10-26,HOU @ LAL,Houston Rockets,1610612745,Los Angeles Lakers,1610612747,L,0,1,...,1955,1801,962,989,2549,2616,1110,1073,0.524,0.695
3,0021000004,2010-10-27,BOS @ CLE,Boston Celtics,1610612738,Cleveland Cavaliers,1610612739,L,0,1,...,1921,1720,639,856,2542,2449,1195,1166,0.683,0.232
4,0021000005,2010-10-27,NJN vs. DET,New Jersey Nets,1610612751,Detroit Pistons,1610612765,W,1,0,...,1723,1730,909,931,2440,2236,1152,1067,0.293,0.366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,0021001226,2011-04-13,DEN @ UTA,Denver Nuggets,1610612743,Utah Jazz,1610612762,L,0,1,...,1813,1921,791,898,2652,2338,1157,1175,0.610,0.476
1226,0021001227,2011-04-13,POR @ GSW,Portland Trail Blazers,1610612757,Golden State Warriors,1610612744,L,0,1,...,1736,1847,996,955,2230,2370,1070,1198,0.585,0.439
1227,0021001228,2011-04-13,MEM @ LAC,Memphis Grizzlies,1610612763,Los Angeles Clippers,1610612746,L,0,1,...,1691,1813,970,955,2391,2501,1145,1343,0.561,0.390
1228,0021001229,2011-04-13,SAS @ PHX,San Antonio Spurs,1610612759,Phoenix Suns,1610612756,L,0,1,...,1836,1945,829,821,2603,2478,1101,1169,0.744,0.488


# More data processing/redefining teams as winners and losers   

In [118]:
new_column_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_1_PTS", "TEAM_1_FGA", "TEAM_1_FG_PCT", "TEAM_1_OREB","TEAM_1_DREB", "TEAM_1_AST", "TEAM_1_TOV", "TEAM_1_WIN_PCT", "TEAM_2_HOME/AWAY", "TEAM_2_PTS", "TEAM_2_FGA", "TEAM_2_FG_PCT", "TEAM_2_OREB","TEAM_2_DREB","TEAM_2_AST", "TEAM_2_TOV", "TEAM_2_WIN_PCT"]
sorted_game_data = game_data[new_column_order]
sorted_game_data.rename(columns={'TEAM_NAME': 'TEAM_1_NAME', 'TEAM_ID': 'TEAM_1_ID', 'HOME/AWAY': 'TEAM_1_HOME/AWAY', 'WL': 'TEAM_1_WIN/LOSS'}, inplace=True)

sorted_game_data


Unnamed: 0,GAME_ID,GAME_DATE,MATCHUP,TEAM_1_NAME,TEAM_1_ID,TEAM_2_NAME,TEAM_2_ID,TEAM_1_WIN/LOSS,TEAM_1_HOME/AWAY,TEAM_1_PTS,...,TEAM_1_WIN_PCT,TEAM_2_HOME/AWAY,TEAM_2_PTS,TEAM_2_FGA,TEAM_2_FG_PCT,TEAM_2_OREB,TEAM_2_DREB,TEAM_2_AST,TEAM_2_TOV,TEAM_2_WIN_PCT
0,0021000001,2010-10-26,BOS vs. MIA,Boston Celtics,1610612738,Miami Heat,1610612748,W,1,7913,...,0.683,0,8369,6301,0.481,790,2666,1639,1142,0.707
1,0021000002,2010-10-26,PHX @ POR,Phoenix Suns,1610612756,Portland Trail Blazers,1610612757,L,0,8611,...,0.488,1,7896,6599,0.447,996,2230,1736,1070,0.585
2,0021000003,2010-10-26,HOU @ LAL,Houston Rockets,1610612745,Los Angeles Lakers,1610612747,L,0,8685,...,0.524,1,8321,6757,0.463,989,2616,1801,1073,0.695
3,0021000004,2010-10-27,BOS @ CLE,Boston Celtics,1610612738,Cleveland Cavaliers,1610612739,L,0,7913,...,0.683,1,7827,6647,0.434,856,2449,1720,1166,0.232
4,0021000005,2010-10-27,NJN vs. DET,New Jersey Nets,1610612751,Detroit Pistons,1610612765,W,1,7722,...,0.293,0,7951,6647,0.460,931,2236,1730,1067,0.366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,0021001226,2011-04-13,DEN @ UTA,Denver Nuggets,1610612743,Utah Jazz,1610612762,L,0,8811,...,0.610,1,8153,6590,0.465,898,2338,1921,1175,0.476
1226,0021001227,2011-04-13,POR @ GSW,Portland Trail Blazers,1610612757,Golden State Warriors,1610612744,L,0,7896,...,0.585,1,8477,7047,0.461,955,2370,1847,1198,0.439
1227,0021001228,2011-04-13,MEM @ LAC,Memphis Grizzlies,1610612763,Los Angeles Clippers,1610612746,L,0,8195,...,0.561,1,8089,6594,0.457,955,2501,1813,1343,0.390
1228,0021001229,2011-04-13,SAS @ PHX,San Antonio Spurs,1610612759,Phoenix Suns,1610612756,L,0,8502,...,0.744,1,8611,6844,0.470,821,2478,1945,1169,0.488


# Saving data... One year at a time 


In [119]:
sorted_game_data.to_csv(f'data/game_by_game_season_stats/game_data_{YEAR}.csv', index=False)