In [1]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import cumestatsteamgames, cumestatsteam, gamerotation
import pandas as pd
import json
import difflib
import time

In [3]:
# Get Past Team Schedules
def getRegularSeasonSchedule(seasonYear,teamID):
    season = str(seasonYear) + "-" + str(seasonYear+1)[-2:]
    teamGames = cumestatsteamgames.CumeStatsTeamGames(league_id = '00',season = season ,
                                                                  season_type_all_star='Regular Season',
                                                                  team_id = teamID).get_normalized_json()

    teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
    teamGames['SEASON'] = season
    return teamGames

In [4]:
# Format Data Frame Columns and Get Home and Away Team IDs

def getGameDate(matchup):
    return matchup.partition(' at')[0][:10]

def getHomeTeam(matchup):
    return matchup.partition(' at')[2]

def getAwayTeam(matchup):
    return matchup.partition(' at')[0][10:]

def getTeamIDFromNickname(nickname):
    return teams.loc[teams['nickname'] == difflib.get_close_matches(nickname,teams['nickname'],1)[0]].values[0][0] 


In [5]:
def getSingleGameMetrics(gameID,homeTeamID,awayTeamID,awayTeamNickname,seasonYear,gameDate):


    def getGameStats(teamID,gameID,seasonYear):
        #season = str(seasonYear) + "-" + str(seasonYear+1)[-2:]
        gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
                                               season=seasonYear,season_type_all_star="Regular Season",
                                               team_id = teamID).get_normalized_json()

        gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])

        return gameStats

    data = getGameStats(homeTeamID,gameID,seasonYear)
    data.at[1,'NICKNAME'] = awayTeamNickname
    data.at[1,'TEAM_ID'] = awayTeamID
    data.at[1,'OFFENSIVE_EFFICIENCY'] = (data.at[1,'FG'] + data.at[1,'AST'])/(data.at[1,'FGA'] - data.at[1,'OFF_REB'] + data.at[1,'AST'] + data.at[1,'TOTAL_TURNOVERS'])
    data.at[1,'SCORING_MARGIN'] = data.at[1,'PTS'] - data.at[0,'PTS']

    data.at[0,'OFFENSIVE_EFFICIENCY'] = (data.at[0,'FG'] + data.at[0,'AST'])/(data.at[0,'FGA'] - data.at[0,'OFF_REB'] + data.at[0,'AST'] + data.at[0,'TOTAL_TURNOVERS'])
    data.at[0,'SCORING_MARGIN'] = data.at[0,'PTS'] - data.at[1,'PTS']

    data['GAME_DATE'] = gameDate

    return data

In [None]:
# Get NBA Team IDs
teams = pd.DataFrame(teams.get_teams())

In [6]:
# Get Schedules for the last 2 seasons + current
scheduleFrame = pd.DataFrame()
years = [2020,2021,2022]

start = time.perf_counter_ns()
for year in years:
    for id in teams['id']:
        time.sleep(1)
        scheduleFrame = scheduleFrame.append(getRegularSeasonSchedule(year,id))
end = time.perf_counter_ns()

mins = ((end - start) / 1e9)/60
print(mins)

3.2057193216666664


In [7]:
scheduleFrame

Unnamed: 0,MATCHUP,GAME_ID,SEASON
0,05/16/2021 Rockets at Hawks,0022001066,2020-21
1,05/13/2021 Magic at Hawks,0022001049,2020-21
2,05/12/2021 Wizards at Hawks,0022001042,2020-21
3,05/10/2021 Wizards at Hawks,0022001026,2020-21
4,05/06/2021 Hawks at Pacers,0022001000,2020-21
...,...,...,...
33,10/28/2022 Hornets at Magic,0022200071,2022-23
34,10/26/2022 Hornets at Knicks,0022200058,2022-23
35,10/23/2022 Hornets at Hawks,0022200038,2022-23
36,10/21/2022 Pelicans at Hornets,0022200017,2022-23


In [8]:
# Identify Home and Away Teams and format date field

start = time.perf_counter_ns()
scheduleFrame['GAME_DATE'] = pd.to_datetime(scheduleFrame['MATCHUP'].map(getGameDate))
scheduleFrame['HOME_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getHomeTeam)
scheduleFrame['HOME_TEAM_ID'] = scheduleFrame['HOME_TEAM_NICKNAME'].map(getTeamIDFromNickname)
scheduleFrame['AWAY_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getAwayTeam)
scheduleFrame['AWAY_TEAM_ID'] = scheduleFrame['AWAY_TEAM_NICKNAME'].map(getTeamIDFromNickname)
scheduleFrame = scheduleFrame.drop_duplicates() # There's a row for both teams, only need 1
scheduleFrame = scheduleFrame.reset_index(drop=True)
end = time.perf_counter_ns()
secs = ((end - start) / 1e9)
print(secs)

14.4533594


In [10]:
getSingleGameMetrics(scheduleFrame.at[1,'GAME_ID'],scheduleFrame.at[1,'HOME_TEAM_ID'],
                     scheduleFrame.at[1,'AWAY_TEAM_ID'],scheduleFrame.at[1,'AWAY_TEAM_NICKNAME'],
                     scheduleFrame.at[1,'SEASON'],scheduleFrame.at[1,'GAME_DATE'])

Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,STL,TOTAL_TURNOVERS,BLK,PTS,AVG_REB,AVG_PTS,DQ,OFFENSIVE_EFFICIENCY,SCORING_MARGIN,GAME_DATE
0,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,9,7,4,116,58.0,116.0,0,0.555556,23.0,2021-05-13
1,OPPONENTS,Magic,1610612753,0,1,0,0,0,1,0,...,3,14,6,93,62.0,93.0,0,0.517241,-23.0,2021-05-13
