# Getting and filtering data for each game for seasons from 2011-2022 (12 seasons)

In [19]:
from nba_api.stats.endpoints import leaguegamelog
from nba_api.stats.endpoints import boxscoretraditionalv2
import pandas as pd

COUNTER =  1
DIRECTION = "ASC"
LEAGUE = "00"
PLAYER_OR_TEAM = "T"
SEASON_TYPE = "Regular Season"
SORTER = "DATE"

def fetch_season_game_logs(season):
    # Create LeagueGameLog object instance for the season
    gamelog = leaguegamelog.LeagueGameLog(
    COUNTER, DIRECTION, LEAGUE, PLAYER_OR_TEAM, season, SEASON_TYPE, SORTER
)
    
    # Execute request and fetch data
    data = gamelog.get_data_frames()[0] 
    
    # Filter columns and add Home/Away column
    columns_to_keep = ["TEAM_ID","TEAM_NAME","GAME_ID","MATCHUP","WL", "GAME_DATE"]
    filtered_data = data[columns_to_keep].copy()
    filtered_data['HOME/AWAY'] = filtered_data['MATCHUP'].apply(lambda x: 0 if '@' in x else 1)
    filtered_data['TEAM_2_HOME/AWAY'] = 1 - filtered_data['HOME/AWAY']
    
    return filtered_data

def fetch_multiple_seasons(start_year, end_year):
    all_seasons_data = []  # List to store data for all seasons
    
    # Loop through each season from start_year to end_year (inclusive)
    for year in range(start_year, end_year + 1):
        season = f"{year}-{str(year + 1)[-2:]}"  # Format season string (e.g., '2010-11')
        season_data = fetch_season_game_logs(season)
        all_seasons_data.append(season_data)
    
    # Concatenate all seasons' data into a single DataFrame
    combined_data = pd.concat(all_seasons_data, ignore_index=True)
    combined_data = combined_data.sort_values(by='GAME_ID', ascending=True)
    combined_data = combined_data.reset_index(drop=True)

    team_2_id = []
    team_2_name = []

    # Add new columns for team IDs and names from adjacent rows
    for i in range(len(combined_data)):
        if i % 2 == 0:  # Every even row
            team_2_id.append(combined_data.loc[i+1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i+1, 'TEAM_NAME'])
        else:  # Every odd row
            team_2_id.append(combined_data.loc[i-1, 'TEAM_ID'])
            team_2_name.append(combined_data.loc[i-1, 'TEAM_NAME'])
    
    # Add new columns to the DataFrame
    combined_data['TEAM_2_ID'] = team_2_id
    combined_data['TEAM_2_NAME'] = team_2_name
    
    columns_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_2_HOME/AWAY"]
    combined_data = combined_data[columns_order]
    
    return combined_data

print(fetch_multiple_seasons(2023,2023))

         GAME_ID   GAME_DATE      MATCHUP             TEAM_NAME     TEAM_ID  \
0     0022300001  2023-11-03  IND vs. CLE        Indiana Pacers  1610612754   
1     0022300001  2023-11-03    CLE @ IND   Cleveland Cavaliers  1610612739   
2     0022300002  2023-11-03    NYK @ MIL       New York Knicks  1610612752   
3     0022300002  2023-11-03  MIL vs. NYK       Milwaukee Bucks  1610612749   
4     0022300003  2023-11-03  MIA vs. WAS            Miami Heat  1610612748   
...          ...         ...          ...                   ...         ...   
2455  0022301228  2023-12-08  PHX vs. SAC          Phoenix Suns  1610612756   
2456  0022301229  2023-12-07  MIL vs. IND       Milwaukee Bucks  1610612749   
2457  0022301229  2023-12-07    IND @ MIL        Indiana Pacers  1610612754   
2458  0022301230  2023-12-07    NOP @ LAL  New Orleans Pelicans  1610612740   
2459  0022301230  2023-12-07  LAL vs. NOP    Los Angeles Lakers  1610612747   

               TEAM_2_NAME   TEAM_2_ID WL  HOME/AWA

# Aquiring Season Stats for each team 

In [35]:
from api_helpers.team_stats_helpers import load_dataframe

attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
nba_dataframe = load_dataframe(attribute_list)
nba_dataframe = nba_dataframe.drop(nba_dataframe[nba_dataframe["FGM"] == 0].index)
nba_dataframe.sort_values(by='YEAR')

nba_dataframe['YEAR'] = nba_dataframe['YEAR'].str.split('-').str[0]
nba_dataframe['YEAR'] = pd.to_numeric(nba_dataframe['YEAR'])
nba_dataframe.sort_values(by="YEAR",inplace=True)

nba_dataframe["NBA_FINALS_APPEARANCE"].fillna(0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("FINALS APPEARANCE", 0.0, inplace=True)
nba_dataframe["NBA_FINALS_APPEARANCE"].replace("LEAGUE CHAMPION", 2, inplace=True)

nba_dataframe = nba_dataframe[nba_dataframe['YEAR'] >= 2023]
season_stat_dataframe = nba_dataframe.reset_index(drop=True)

print(season_stat_dataframe)



       TEAM_ID  YEAR  WIN_PCT  NBA_FINALS_APPEARANCE   FGM   FGA  FG_PCT  \
0   1610612737  2023    0.439                    0.0  3529  7584   0.465   
1   1610612738  2023    0.780                    2.0  3601  7396   0.487   
2   1610612761  2023    0.305                    0.0  3466  7356   0.471   
3   1610612763  2023    0.329                    0.0  3145  7230   0.435   
4   1610612762  2023    0.378                    0.0  3443  7371   0.467   
5   1610612758  2023    0.561                    0.0  3553  7455   0.477   
6   1610612759  2023    0.268                    0.0  3438  7436   0.462   
7   1610612760  2023    0.695                    0.0  3653  7324   0.499   
8   1610612747  2023    0.573                    0.0  3580  7177   0.499   
9   1610612757  2023    0.256                    0.0  3227  7356   0.439   
10  1610612748  2023    0.561                    0.0  3266  7022   0.465   
11  1610612746  2023    0.622                    0.0  3473  7108   0.489   
12  16106127

# Combining Dataframes
For each game, listing the stats of both teams for comparison

In [36]:
game_data = fetch_multiple_seasons(2023,2023)
season_team_data = season_stat_dataframe


team_1_pts = []
team_2_pts = []
team_1_fg_pct = []
team_2_fg_pct = []
team_1_fga = []
team_2_fga = []
team_1_ast = []
team_2_ast = []
team_1_oreb = []
team_2_oreb = []
team_1_dreb = []
team_2_dreb = []
team_1_tov = []
team_2_tov = []
team_1_win_pct = []
team_2_win_pct = []

# As a reminder: attribute_list = ["PTS","FGA", "FGM", "FG_PCT", "OREB","DREB","AST", "TOV", "WIN_PCT"]
# Add new columns for team IDs and names from adjacent rows
for i in range(len(game_data)):
    team_1_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[0]])
    team_2_pts.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[0]])
    team_1_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[1]])
    team_2_fga.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[1]])
    team_1_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[3]])
    team_2_fg_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[3]])
    team_1_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[4]])
    team_2_oreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[4]])
    team_1_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[5]])
    team_2_dreb.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[5]])
    team_1_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[6]])
    team_2_ast.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[6]])
    team_1_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[7]])
    team_2_tov.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[7]])
    team_1_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_ID']].index[0], attribute_list[8]])
    team_2_win_pct.append(season_team_data.loc[season_team_data[season_team_data['TEAM_ID'] == game_data.loc[i, 'TEAM_2_ID']].index[0], attribute_list[8]])


game_data['TEAM_1_PTS'] = team_1_pts
game_data['TEAM_2_PTS'] = team_2_pts
game_data['TEAM_1_FG_PCT'] = team_1_fg_pct
game_data['TEAM_2_FG_PCT'] = team_2_fg_pct
game_data['TEAM_1_FGA'] = team_1_fga
game_data['TEAM_2_FGA'] = team_2_fga
game_data['TEAM_1_AST'] = team_1_ast
game_data['TEAM_2_AST'] = team_2_ast
game_data['TEAM_1_OREB'] = team_1_oreb
game_data['TEAM_2_OREB'] = team_2_oreb
game_data['TEAM_1_DREB'] = team_1_dreb
game_data['TEAM_2_DREB'] = team_2_dreb
game_data['TEAM_1_TOV'] = team_1_pts
game_data['TEAM_2_TOV'] = team_2_pts
game_data['TEAM_1_WIN_PCT'] = team_1_pts
game_data['TEAM_2_WIN_PCT'] = team_2_pts

game_data = game_data.iloc[::2]
game_data.reset_index(drop=True, inplace=True)
print(game_data)





         GAME_ID   GAME_DATE      MATCHUP              TEAM_NAME     TEAM_ID  \
0     0022300001  2023-11-03    CLE @ IND    Cleveland Cavaliers  1610612739   
1     0022300002  2023-11-03  MIL vs. NYK        Milwaukee Bucks  1610612749   
2     0022300003  2023-11-03    WAS @ MIA     Washington Wizards  1610612764   
3     0022300004  2023-11-03    BKN @ CHI          Brooklyn Nets  1610612751   
4     0022300005  2023-11-03    GSW @ OKC  Golden State Warriors  1610612744   
...          ...         ...          ...                    ...         ...   
1225  0022301226  2023-12-08    DAL @ POR       Dallas Mavericks  1610612742   
1226  0022301227  2023-12-08    NYK @ BOS        New York Knicks  1610612752   
1227  0022301228  2023-12-08  PHX vs. SAC           Phoenix Suns  1610612756   
1228  0022301229  2023-12-07  MIL vs. IND        Milwaukee Bucks  1610612749   
1229  0022301230  2023-12-07    NOP @ LAL   New Orleans Pelicans  1610612740   

                 TEAM_2_NAME   TEAM_2_I

# More data processing/redefining teams as winners and losers   

In [37]:
new_column_order = ["GAME_ID", "GAME_DATE", "MATCHUP", "TEAM_NAME", "TEAM_ID", "TEAM_2_NAME", "TEAM_2_ID", "WL", "HOME/AWAY", "TEAM_1_PTS", "TEAM_1_FGA", "TEAM_1_FG_PCT", "TEAM_1_OREB","TEAM_1_DREB", "TEAM_1_AST", "TEAM_1_TOV", "TEAM_1_WIN_PCT", "TEAM_2_HOME/AWAY", "TEAM_2_PTS", "TEAM_2_FGA", "TEAM_2_FG_PCT", "TEAM_2_OREB","TEAM_2_DREB","TEAM_2_AST", "TEAM_2_TOV", "TEAM_2_WIN_PCT"]
sorted_game_data = game_data[new_column_order].copy()
sorted_game_data.rename(columns={'TEAM_NAME': 'TEAM_1_NAME', 'TEAM_ID': 'TEAM_1_ID', 'HOME/AWAY': 'TEAM_1_HOME/AWAY', 'WL': 'TEAM_1_WIN/LOSS'}, inplace=True)

print(game_data)


         GAME_ID   GAME_DATE      MATCHUP              TEAM_NAME     TEAM_ID  \
0     0022300001  2023-11-03    CLE @ IND    Cleveland Cavaliers  1610612739   
1     0022300002  2023-11-03  MIL vs. NYK        Milwaukee Bucks  1610612749   
2     0022300003  2023-11-03    WAS @ MIA     Washington Wizards  1610612764   
3     0022300004  2023-11-03    BKN @ CHI          Brooklyn Nets  1610612751   
4     0022300005  2023-11-03    GSW @ OKC  Golden State Warriors  1610612744   
...          ...         ...          ...                    ...         ...   
1225  0022301226  2023-12-08    DAL @ POR       Dallas Mavericks  1610612742   
1226  0022301227  2023-12-08    NYK @ BOS        New York Knicks  1610612752   
1227  0022301228  2023-12-08  PHX vs. SAC           Phoenix Suns  1610612756   
1228  0022301229  2023-12-07  MIL vs. IND        Milwaukee Bucks  1610612749   
1229  0022301230  2023-12-07    NOP @ LAL   New Orleans Pelicans  1610612740   

                 TEAM_2_NAME   TEAM_2_I

# Saving data... One year at a time 
