In [34]:
from nba_api.stats.endpoints import leaguegamelog, teamgamelog
from nba_api.stats.static import teams
import pandas as pd
import numpy as np
import time
import warnings

warnings.filterwarnings("ignore")

In [2]:
season = "2019-20"

In [17]:
teams_df = pd.DataFrame(teams.get_teams())
team_to_id = {team["full_name"]: team["id"] for team in teams_df.to_dict("records")}
team_to_id.update(
    {team["abbreviation"]: team["id"] for team in teams_df.to_dict("records")}
)
team_to_id.update({team["id"]: team["id"] for team in teams_df.to_dict("records")})
teams_df.head(5)

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Georgia,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [35]:
# Get league game log for 2023 - 2024 season
game_log = leaguegamelog.LeagueGameLog(season=season).get_data_frames()
assert len(game_log) == 1
games_df = game_log[0]

# filter games_df to only include home games (prevent duplicating for both teams)
games_df = games_df[~games_df.MATCHUP.str.contains("@")]
print(games_df.shape, len(games_df.GAME_ID.unique()))

# add opponent team_id and team_abbreviation columns
games_df.loc[:, "OPP_TEAM_ABBREVIATION"] = games_df["MATCHUP"].str.split(" vs. ").str[1]
games_df.loc[:, "OPP_TEAM_ID"] = games_df["OPP_TEAM_ABBREVIATION"].map(team_to_id)

# adjust WL to 1 for win and 0 for loss
games_df.loc[:, "WL"] = games_df["WL"].map({"W": 1, "L": 0, 1: 1, 0: 0})

# get rid of useless columns
games_df = games_df[
    [
        "GAME_ID",
        "GAME_DATE",
        "TEAM_ID",
        "TEAM_ABBREVIATION",
        "OPP_TEAM_ID",
        "OPP_TEAM_ABBREVIATION",
        "PLUS_MINUS",
        "WL",
    ]
]

games_df.head(10)

(1059, 29) 1059


Unnamed: 0,GAME_ID,GAME_DATE,TEAM_ID,TEAM_ABBREVIATION,OPP_TEAM_ID,OPP_TEAM_ABBREVIATION,PLUS_MINUS,WL
2,21900002,2019-10-22,1610612746,LAC,1610612747,LAL,10,1
3,21900001,2019-10-22,1610612761,TOR,1610612740,NOP,8,1
4,21900003,2019-10-23,1610612766,CHA,1610612741,CHI,1,1
5,21900011,2019-10-23,1610612762,UTA,1610612760,OKC,5,1
7,21900009,2019-10-23,1610612742,DAL,1610612764,WAS,8,1
8,21900007,2019-10-23,1610612748,MIA,1610612763,MEM,19,1
11,21900008,2019-10-23,1610612755,PHI,1610612738,BOS,14,1
12,21900010,2019-10-23,1610612759,SAS,1610612752,NYK,9,1
17,21900013,2019-10-23,1610612757,POR,1610612743,DEN,-8,0
20,21900006,2019-10-23,1610612751,BKN,1610612750,MIN,-1,0


In [None]:
team_dfs = {}
for i, team in enumerate(teams_df["id"]):
    print(f"Getting game log for {teams_df['full_name'][i]}")
    team_log = teamgamelog.TeamGameLog(team_id=team, season=season).get_data_frames()
    assert len(team_log) == 1
    team_dfs[team] = team_log[0]
    time.sleep(0.5)

In [None]:
test_df = teamgamelog.TeamGameLog(team_id=1610612737, season=season).get_data_frames()[
    0
]
test_df

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1610612737,0021900969,"MAR 11, 2020",ATL vs. NYK,L,20,47,0.299,265,48,...,0.808,15,38,53,26,6,3,17,25,131
1,1610612737,0021900957,"MAR 09, 2020",ATL vs. CHA,W,20,46,0.303,290,53,...,0.593,12,41,53,33,1,5,15,26,143
2,1610612737,0021900943,"MAR 07, 2020",ATL @ MEM,L,19,46,0.292,240,32,...,0.857,14,32,46,23,9,2,14,24,101
3,1610612737,0021900930,"MAR 06, 2020",ATL @ WAS,L,19,45,0.297,240,46,...,0.667,6,30,36,25,9,4,17,25,112
4,1610612737,0021900905,"MAR 02, 2020",ATL vs. MEM,L,19,44,0.302,240,32,...,0.938,16,27,43,20,6,8,17,21,88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,1610612737,0021900066,"OCT 31, 2019",ATL vs. MIA,L,2,3,0.400,240,36,...,0.692,16,34,50,20,5,5,16,15,97
63,1610612737,0021900052,"OCT 29, 2019",ATL @ MIA,L,2,2,0.500,240,35,...,0.667,9,24,33,28,14,7,20,29,97
64,1610612737,0021900043,"OCT 28, 2019",ATL vs. PHI,L,2,1,0.667,240,36,...,0.688,8,37,45,23,12,3,21,25,103
65,1610612737,0021900028,"OCT 26, 2019",ATL vs. ORL,W,2,0,1.000,240,43,...,0.533,9,43,52,22,5,9,18,26,103


In [None]:
def process_team_df(team_df):
    team_df["GAME_DATE"] = pd.to_datetime(team_df["GAME_DATE"])
    team_df["WL"] = team_df["WL"].apply(lambda x: 1 if x == "W" else 0)
    team_df["MATCHUP"] = team_df["MATCHUP"].apply(lambda x: x.split(" ")[2])
    team_df["OPP"] = team_df["MATCHUP"].apply(lambda x: team_map[x])
    return team_df