In [1]:
%config IPCompleter.greedy=True

# Import the dependencies.
import json
import re
import time

import numpy as np
import pandas as pd
import requests

In [2]:
# The request files to verify its structure.
base_url = "https://stats.nba.com/stats/"

headers = {
    "Host": "stats.nba.com",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "x-nba-stats-origin": "stats",
    "x-nba-stats-token": "true",
    "Connection": "keep-alive",
    "Referer": "https://stats.nba.com/",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}

season_type = "Playoffs"
# Regular+Season

measure_type = {"base": "Base", "advanced": "Advanced"}

stat_modes_depth = {
    "Per100Possessions": "Per100Possessions",
    "Totals": "Totals",
    "Per36": "Per36",
    "PerGame": "PerGame",
}

season_id = "2019-20"
team_id = 1610612737
per_mode = "PerGame"

season_list = [
    "1996-97",
    "1997-98",
    "1998-99",
    "1999-00",
    "2000-01",
    "2001-02",
    "2002-03",
    "2003-04",
    "2004-05",
    "2005-06",
    "2006-07",
    "2007-08",
    "2008-09",
    "2009-10",
    "2010-11",
    "2011-12",
    "2012-13",
    "2013-14",
    "2014-15",
    "2015-16",
    "2016-17",
    "2017-18",
    "2018-19",
    "2019-20",
]

list_of_nbastats_apis = {
    # base_url, per-mode, season
    "player_general_traditional_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_general_advanced_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_bios": f"{base_url}leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight=",
    # base_url, season, but look into PlayerOrTeam
    "player_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=P&Season={season_id}&SeasonType=Regular+Season&Sorter=DATE",
    # base_url, season, but look into PlayerOrTeam
    "team_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=T&Season={season_id}&SeasonType={season_type}&Sorter=DATE",
    # base_url, per-mode, season
    "team_general_traditional_stats": f"{base_url}leaguedashteamstats?DateFrom=&DateTo=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&VsConference=&VsDivision=",
    # base_url, per-mode, season
    "team_general_advanced_stats": f"{base_url}leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=",
    # base_url, per-mode, season, team
    "players_by_team": f"{base_url}teamplayerdashboard?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&TeamID={team_id}&VsConference=&VsDivision=",
    # base_url, season
    "player_abbr": f"{base_url}commonallplayers?IsOnlyCurrentSeason=0&LeagueID=00&Season={season_id}",
    # base_url, team, season
    "team_abbr": f"{base_url}teaminfocommon?LeagueID=00&SeasonType=Regular+Season&TeamID={team_id}&Season={season_id}",
}

response = requests.get(
    url=list_of_nbastats_apis["team_game_logs"], headers=headers
).json()

# pretty printing data
pretty_data = json.dumps(response, indent=4)
print(len(pretty_data))

133994


In [3]:
# pulling the headers we want to experiment with
stat_header = response["resultSets"][0]["headers"]
print(stat_header)

['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE']


In [4]:
# lower case all the columns names
lower_cased_stat_header = [[header.lower() for header in item] for item in stat_header]
final_lower_cased = ["".join(header) for header in lower_cased_stat_header]
final_lower_cased.insert(0, f"{season_id}")
print(final_lower_cased)

['2019-20', 'season_id', 'team_id', 'team_abbreviation', 'team_name', 'game_id', 'game_date', 'matchup', 'wl', 'min', 'fgm', 'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct', 'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus', 'video_available']


In [5]:
# pulling just the data we want
body_data = response["resultSets"][0]["rowSet"]
print(body_data)

[['42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900406', '2020-10-11', 'LAL @ MIA', 'W', 240, 43, 89, 0.483, 11, 35, 0.314, 9, 14, 0.643, 12, 34, 46, 23, 5, 4, 14, 22, 106, 13, 1], ['42019', 1610612748, 'MIA', 'Miami Heat', '0041900406', '2020-10-11', 'MIA vs. LAL', 'L', 240, 35, 79, 0.443, 10, 28, 0.357, 13, 22, 0.591, 9, 32, 41, 25, 4, 4, 15, 18, 93, -13, 1], ['42019', 1610612748, 'MIA', 'Miami Heat', '0041900405', '2020-10-09', 'MIA @ LAL', 'W', 240, 38, 83, 0.458, 14, 33, 0.424, 21, 22, 0.955, 9, 26, 35, 26, 7, 3, 13, 19, 111, 3, 1], ['42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900405', '2020-10-09', 'LAL vs. MIA', 'L', 240, 38, 82, 0.463, 14, 38, 0.368, 18, 21, 0.857, 12, 29, 41, 21, 10, 5, 15, 21, 108, -3, 1], ['42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900404', '2020-10-06', 'LAL @ MIA', 'W', 240, 35, 79, 0.443, 14, 39, 0.359, 18, 21, 0.857, 10, 32, 42, 25, 5, 4, 15, 14, 102, 6, 1], ['42019', 1610612748, 'MIA', 'Miami Heat', '0041900404', '2020-10

In [6]:
# looping over data to insert into table
team_game_logs_table = []
for row in body_data:
    team_game_logs_table.append(
        [
            f"{season_id}",  # this is key, need this to join and sort by seasons
            row[0],
            row[1],
            row[2],
            row[3],
            row[4],
            row[5],
            row[6],
            row[7],
            row[8],
            row[9],
            row[10],
            row[11],
            row[12],
            row[13],
            row[14],
            row[15],
            row[16],
            row[17],
            row[18],
            row[19],
            row[20],
            row[21],
            row[22],
            row[23],
            row[24],
            row[25],
            row[26],
            row[27],
            row[28],
        ]
    )

print(team_game_logs_table)

[['2019-20', '42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900406', '2020-10-11', 'LAL @ MIA', 'W', 240, 43, 89, 0.483, 11, 35, 0.314, 9, 14, 0.643, 12, 34, 46, 23, 5, 4, 14, 22, 106, 13, 1], ['2019-20', '42019', 1610612748, 'MIA', 'Miami Heat', '0041900406', '2020-10-11', 'MIA vs. LAL', 'L', 240, 35, 79, 0.443, 10, 28, 0.357, 13, 22, 0.591, 9, 32, 41, 25, 4, 4, 15, 18, 93, -13, 1], ['2019-20', '42019', 1610612748, 'MIA', 'Miami Heat', '0041900405', '2020-10-09', 'MIA @ LAL', 'W', 240, 38, 83, 0.458, 14, 33, 0.424, 21, 22, 0.955, 9, 26, 35, 26, 7, 3, 13, 19, 111, 3, 1], ['2019-20', '42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900405', '2020-10-09', 'LAL vs. MIA', 'L', 240, 38, 82, 0.463, 14, 38, 0.368, 18, 21, 0.857, 12, 29, 41, 21, 10, 5, 15, 21, 108, -3, 1], ['2019-20', '42019', 1610612747, 'LAL', 'Los Angeles Lakers', '0041900404', '2020-10-06', 'LAL @ MIA', 'W', 240, 35, 79, 0.443, 14, 39, 0.359, 18, 21, 0.857, 10, 32, 42, 25, 5, 4, 15, 14, 102, 6, 1], ['2019-20'

In [7]:
team_game_logs_df = pd.DataFrame(
    data=team_game_logs_table, columns=final_lower_cased
)
team_game_logs_df.head()

Unnamed: 0,2019-20,season_id,team_id,team_abbreviation,team_name,game_id,game_date,matchup,wl,min,...,dreb,reb,ast,stl,blk,tov,pf,pts,plus_minus,video_available
0,2019-20,42019,1610612747,LAL,Los Angeles Lakers,41900406,2020-10-11,LAL @ MIA,W,240,...,34,46,23,5,4,14,22,106,13,1
1,2019-20,42019,1610612748,MIA,Miami Heat,41900406,2020-10-11,MIA vs. LAL,L,240,...,32,41,25,4,4,15,18,93,-13,1
2,2019-20,42019,1610612748,MIA,Miami Heat,41900405,2020-10-09,MIA @ LAL,W,240,...,26,35,26,7,3,13,19,111,3,1
3,2019-20,42019,1610612747,LAL,Los Angeles Lakers,41900405,2020-10-09,LAL vs. MIA,L,240,...,29,41,21,10,5,15,21,108,-3,1
4,2019-20,42019,1610612747,LAL,Los Angeles Lakers,41900404,2020-10-06,LAL @ MIA,W,240,...,32,42,25,5,4,15,14,102,6,1


In [8]:
# Save to CSV
team_game_logs_df.to_csv(
    "../../data/interim/nba-stats-team_game_logs-2019-20.csv", index=False
)

In [10]:
game_id_list = []
for x in body_data:
    game_id_list.append(x[4])

print(len(set(game_id_list)))
print(set(game_id_list))

83
{'0041900165', '0041900105', '0041900401', '0041900214', '0041900163', '0041900313', '0041900174', '0041900134', '0041900112', '0041900237', '0041900205', '0041900145', '0041900305', '0041900144', '0041900406', '0041900175', '0041900162', '0041900151', '0041900164', '0041900405', '0041900171', '0041900312', '0041900404', '0041900153', '0041900104', '0041900177', '0041900217', '0041900176', '0041900131', '0041900306', '0041900402', '0041900123', '0041900202', '0041900216', '0041900154', '0041900235', '0041900231', '0041900311', '0041900161', '0041900304', '0041900143', '0041900232', '0041900111', '0041900167', '0041900152', '0041900301', '0041900102', '0041900303', '0041900203', '0041900103', '0041900155', '0041900172', '0041900114', '0041900173', '0041900213', '0041900121', '0041900314', '0041900315', '0041900223', '0041900204', '0041900224', '0041900133', '0041900156', '0041900221', '0041900132', '0041900142', '0041900222', '0041900212', '0041900211', '0041900236', '0041900302', '0