In [1]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import cumestatsteamgames, cumestatsteam, gamerotation
import pandas as pd
import json
import difflib
import time

In [2]:
# Get Past Team Schedules
def getRegularSeasonSchedule(seasonYear,teamID):
    season = str(seasonYear) + "-" + str(seasonYear+1)[-2:]
    teamGames = cumestatsteamgames.CumeStatsTeamGames(league_id = '00',season = season ,
                                                                  season_type_all_star='Regular Season',
                                                                  team_id = teamID).get_normalized_json()

    teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
    teamGames['SEASON'] = season
    return teamGames

In [3]:
# Format Data Frame Columns and Get Home and Away Team IDs

def getGameDate(matchup):
    return matchup.partition(' at')[0][:10]

def getHomeTeam(matchup):
    return matchup.partition(' at')[2]

def getAwayTeam(matchup):
    return matchup.partition(' at')[0][10:]

def getTeamIDFromNickname(nickname):
    return teams.loc[teams['nickname'] == difflib.get_close_matches(nickname,teams['nickname'],1)[0]].values[0][0] 


In [4]:
def getSingleGameMetrics(gameID,homeTeamID,awayTeamID,awayTeamNickname,seasonYear,gameDate):


    def getGameStats(teamID,gameID,seasonYear):
        #season = str(seasonYear) + "-" + str(seasonYear+1)[-2:]
        gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
                                               season=seasonYear,season_type_all_star="Regular Season",
                                               team_id = teamID).get_normalized_json()

        gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])

        return gameStats

    data = getGameStats(homeTeamID,gameID,seasonYear)
    data.at[1,'NICKNAME'] = awayTeamNickname
    data.at[1,'TEAM_ID'] = awayTeamID
    data.at[1,'OFFENSIVE_EFFICIENCY'] = (data.at[1,'FG'] + data.at[1,'AST'])/(data.at[1,'FGA'] - data.at[1,'OFF_REB'] + data.at[1,'AST'] + data.at[1,'TOTAL_TURNOVERS'])
    data.at[1,'SCORING_MARGIN'] = data.at[1,'PTS'] - data.at[0,'PTS']

    data.at[0,'OFFENSIVE_EFFICIENCY'] = (data.at[0,'FG'] + data.at[0,'AST'])/(data.at[0,'FGA'] - data.at[0,'OFF_REB'] + data.at[0,'AST'] + data.at[0,'TOTAL_TURNOVERS'])
    data.at[0,'SCORING_MARGIN'] = data.at[0,'PTS'] - data.at[1,'PTS']

    data['SEASON'] = seasonYear
    data['GAME_DATE'] = gameDate
    data['GAME_ID'] = gameID

    return data

In [5]:
# Get NBA Team IDs
teams = pd.DataFrame(teams.get_teams())
teams.head(10)

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966
5,1610612742,Dallas Mavericks,DAL,Mavericks,Dallas,Texas,1980
6,1610612743,Denver Nuggets,DEN,Nuggets,Denver,Colorado,1976
7,1610612744,Golden State Warriors,GSW,Warriors,Golden State,California,1946
8,1610612745,Houston Rockets,HOU,Rockets,Houston,Texas,1967
9,1610612746,Los Angeles Clippers,LAC,Clippers,Los Angeles,California,1970


In [6]:
# Get Schedules for the last 2 seasons + current
scheduleFrame = pd.DataFrame()
years = [2020,2021,2022]

start = time.perf_counter_ns()
for year in years:
    for id in teams['id']:
        time.sleep(1)
        scheduleFrame = scheduleFrame.append(getRegularSeasonSchedule(year,id))
end = time.perf_counter_ns()

mins = ((end - start) / 1e9)/60
print(mins)

2.4451183616666667


In [7]:
scheduleFrame.head(10)

Unnamed: 0,MATCHUP,GAME_ID,SEASON
0,05/16/2021 Rockets at Hawks,22001066,2020-21
1,05/13/2021 Magic at Hawks,22001049,2020-21
2,05/12/2021 Wizards at Hawks,22001042,2020-21
3,05/10/2021 Wizards at Hawks,22001026,2020-21
4,05/06/2021 Hawks at Pacers,22001000,2020-21
5,05/05/2021 Suns at Hawks,22000992,2020-21
6,05/03/2021 Trail Blazers at Hawks,22000978,2020-21
7,05/01/2021 Bulls at Hawks,22000959,2020-21
8,04/30/2021 Hawks at 76ers,22000949,2020-21
9,04/28/2021 Hawks at 76ers,22000934,2020-21


In [8]:
# Identify Home and Away Teams and format date field

start = time.perf_counter_ns()
scheduleFrame['GAME_DATE'] = pd.to_datetime(scheduleFrame['MATCHUP'].map(getGameDate))
scheduleFrame['HOME_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getHomeTeam)
scheduleFrame['HOME_TEAM_ID'] = scheduleFrame['HOME_TEAM_NICKNAME'].map(getTeamIDFromNickname)
scheduleFrame['AWAY_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getAwayTeam)
scheduleFrame['AWAY_TEAM_ID'] = scheduleFrame['AWAY_TEAM_NICKNAME'].map(getTeamIDFromNickname)
scheduleFrame = scheduleFrame.drop_duplicates() # There's a row for both teams, only need 1
scheduleFrame = scheduleFrame.reset_index(drop=True)
end = time.perf_counter_ns()
secs = ((end - start) / 1e9)
print(secs)

6.0619276


In [9]:
getSingleGameMetrics(scheduleFrame.at[1,'GAME_ID'],scheduleFrame.at[1,'HOME_TEAM_ID'],
                     scheduleFrame.at[1,'AWAY_TEAM_ID'],scheduleFrame.at[1,'AWAY_TEAM_NICKNAME'],
                     scheduleFrame.at[1,'SEASON'],scheduleFrame.at[1,'GAME_DATE'])

Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,BLK,PTS,AVG_REB,AVG_PTS,DQ,OFFENSIVE_EFFICIENCY,SCORING_MARGIN,SEASON,GAME_DATE,GAME_ID
0,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,4,116,58.0,116.0,0,0.555556,23.0,2020-21,2021-05-13,22001049
1,OPPONENTS,Magic,1610612753,0,1,0,0,0,1,0,...,6,93,62.0,93.0,0,0.517241,-23.0,2020-21,2021-05-13,22001049


In [10]:
gameLogs = pd.DataFrame()

In [14]:
i = int(len(gameLogs)/2) #where we last left off, will be 0 if just starting

while i<len(scheduleFrame):
    #Need to add a try except, try 5 times until logging failed instance and continue
    gameLogs =  gameLogs.append(getSingleGameMetrics(scheduleFrame.at[i,'GAME_ID'],scheduleFrame.at[i,'HOME_TEAM_ID'],
                     scheduleFrame.at[i,'AWAY_TEAM_ID'],scheduleFrame.at[i,'AWAY_TEAM_NICKNAME'],
                     scheduleFrame.at[i,'SEASON'],scheduleFrame.at[i,'GAME_DATE']))
    
    i+=1
    

gameLogs = gameLogs.reset_index(drop=True)

In [15]:
gameLogs.to_csv('gameLogs.csv')

In [18]:
gameLogs = gameLogs.reset_index(drop=True)

In [19]:
gameLogs

Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,BLK,PTS,AVG_REB,AVG_PTS,DQ,OFFENSIVE_EFFICIENCY,SCORING_MARGIN,SEASON,GAME_DATE,GAME_ID
0,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,8,124,63.0,124.0,0,0.653226,29.0,2020-21,2021-05-16,0022001066
1,OPPONENTS,Rockets,1610612745,0,1,0,0,0,1,1,...,5,95,51.0,95.0,0,0.459016,-29.0,2020-21,2021-05-16,0022001066
2,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,4,116,58.0,116.0,0,0.555556,23.0,2020-21,2021-05-13,0022001049
3,OPPONENTS,Magic,1610612753,0,1,0,0,0,1,0,...,6,93,62.0,93.0,0,0.517241,-23.0,2020-21,2021-05-13,0022001049
4,Atlanta,Hawks,1610612737,1,0,1,0,0,0,0,...,5,120,56.0,120.0,0,0.573913,4.0,2020-21,2021-05-12,0022001042
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5765,OPPONENTS,Wizards,1610612764,1,0,0,0,1,0,1,...,6,108,58.0,108.0,0,0.594340,8.0,2022-23,2022-11-07,0022200144
5766,Washington,Wizards,1610612764,1,0,1,0,0,0,1,...,7,120,55.0,120.0,0,0.614035,21.0,2022-23,2022-10-25,0022200052
5767,OPPONENTS,Pistons,1610612765,0,1,0,0,0,1,0,...,1,99,48.0,99.0,0,0.470588,-21.0,2022-23,2022-10-25,0022200052
5768,Charlotte,Hornets,1610612766,0,1,0,1,0,0,0,...,6,134,65.0,134.0,1,0.552846,-7.0,2022-23,2022-12-14,0022200414
