In [26]:
%config IPCompleter.greedy=True

# Import the dependencies.
import json
import re
import time

import numpy as np
import pandas as pd
import requests

In [27]:
# The request files to verify its structure.
base_url = "https://stats.nba.com/stats/"

headers = {
    "Host": "stats.nba.com",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "x-nba-stats-origin": "stats",
    "x-nba-stats-token": "true",
    "Connection": "keep-alive",
    "Referer": "https://stats.nba.com/",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}

season_type = "Playoffs"
# Regular+Season

measure_type = {"base": "Base", "advanced": "Advanced"}

stat_modes_depth = {
    "Per100Possessions": "Per100Possessions",
    "Totals": "Totals",
    "Per36": "Per36",
    "PerGame": "PerGame",
}

season_id = "2019-20"
team_id = 1610612737
per_mode = "PerGame"

season_list = [
    "1996-97",
    "1997-98",
    "1998-99",
    "1999-00",
    "2000-01",
    "2001-02",
    "2002-03",
    "2003-04",
    "2004-05",
    "2005-06",
    "2006-07",
    "2007-08",
    "2008-09",
    "2009-10",
    "2010-11",
    "2011-12",
    "2012-13",
    "2013-14",
    "2014-15",
    "2015-16",
    "2016-17",
    "2017-18",
    "2018-19",
    "2019-20",
]

list_of_nbastats_apis = {
    # base_url, per-mode, season
    "player_general_traditional_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_general_advanced_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_bios": f"{base_url}leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight=",
    # base_url, season, but look into PlayerOrTeam
    "player_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=P&Season={season_id}&SeasonType=Regular+Season&Sorter=DATE",
    # base_url, season, but look into PlayerOrTeam
    "team_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=T&Season={season_id}&SeasonType={season_type}&Sorter=DATE",
    # base_url, per-mode, season
    "team_general_traditional_stats": f"{base_url}leaguedashteamstats?DateFrom=&DateTo=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&VsConference=&VsDivision=",
    # base_url, per-mode, season
    "team_general_advanced_stats": f"{base_url}leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=",
    # base_url, per-mode, season, team
    "players_by_team": f"{base_url}teamplayerdashboard?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&TeamID={team_id}&VsConference=&VsDivision=",
    # base_url, season
    "player_abbr": f"{base_url}commonallplayers?IsOnlyCurrentSeason=0&LeagueID=00&Season={season_id}",
    # base_url, team, season
    "team_abbr": f"{base_url}teaminfocommon?LeagueID=00&SeasonType=Regular+Season&TeamID={team_id}&Season={season_id}",
}

response = requests.get(
    url=list_of_nbastats_apis["player_game_logs"], headers=headers
).json()

# pretty printing data
pretty_data = json.dumps(response, indent=4)
print(len(pretty_data))

19631654


In [28]:
# pulling the headers we want to experiment with
stat_header = response["resultSets"][0]["headers"]
print(stat_header)

['SEASON_ID', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'PLUS_MINUS', 'FANTASY_PTS', 'VIDEO_AVAILABLE']


In [29]:
# lower case all the columns names
lower_cased_stat_header = [[header.lower() for header in item] for item in stat_header]
final_lower_cased = ["".join(header) for header in lower_cased_stat_header]
final_lower_cased.insert(0, f"{season_id}")
print(final_lower_cased)

['2019-20', 'season_id', 'player_id', 'player_name', 'team_id', 'team_abbreviation', 'team_name', 'game_id', 'game_date', 'matchup', 'wl', 'min', 'fgm', 'fga', 'fg_pct', 'fg3m', 'fg3a', 'fg3_pct', 'ftm', 'fta', 'ft_pct', 'oreb', 'dreb', 'reb', 'ast', 'stl', 'blk', 'tov', 'pf', 'pts', 'plus_minus', 'fantasy_pts', 'video_available']


In [30]:
# pulling just the data we want
body_data = response["resultSets"][0]["rowSet"]
print(body_data)

[['22019', 203943, 'Noah Vonleh', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 7, 1, 2, 0.5, 0, 0, None, 0, 0, None, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 3.5, 1], ['22019', 1626169, 'Stanley Johnson', 1610612761, 'TOR', 'Toronto Raptors', '0021901318', '2020-08-14', 'TOR vs. DEN', 'W', 29, 9, 16, 0.563, 4, 8, 0.5, 1, 3, 0.333, 0, 4, 4, 6, 1, 1, 5, 5, 23, 5, 37.8, 1], ['22019', 1629076, 'Tyler Cook', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 16, 1, 2, 0.5, 0, 0, None, 2, 2, 1.0, 2, 2, 4, 0, 2, 0, 2, 1, 4, -3, 12.8, 1], ['22019', 203584, 'Troy Daniels', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 28, 4, 11, 0.364, 2, 8, 0.25, 0, 0, None, 0, 3, 3, 1, 1, 0, 3, 3, 10, -2, 15.1, 1], ['22019', 1627750, 'Jamal Murray', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 10, 4, 5, 0.8, 3, 3, 1.0, 0, 0, None, 0, 0, 0, 1, 1, 0, 0, 2, 11, -6, 15.5, 1], ['220

In [31]:
# looping over data to insert into table
player_game_logs_table = []
for row in body_data:
    player_game_logs_table.append(
        [
            f"{season_id}",  # this is key, need this to join and sort by seasons
            row[0],
            row[1],
            row[2],
            row[3],
            row[4],
            row[5],
            row[6],
            row[7],
            row[8],
            row[9],
            row[10],
            row[11],
            row[12],
            row[13],
            row[14],
            row[15],
            row[16],
            row[17],
            row[18],
            row[19],
            row[20],
            row[21],
            row[22],
            row[23],
            row[24],
            row[25],
            row[26],
            row[27],
            row[28],
            row[29],
            row[30],
            row[31],
        ]
    )

print(player_game_logs_table)

[['2019-20', '22019', 203943, 'Noah Vonleh', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 7, 1, 2, 0.5, 0, 0, None, 0, 0, None, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 3.5, 1], ['2019-20', '22019', 1626169, 'Stanley Johnson', 1610612761, 'TOR', 'Toronto Raptors', '0021901318', '2020-08-14', 'TOR vs. DEN', 'W', 29, 9, 16, 0.563, 4, 8, 0.5, 1, 3, 0.333, 0, 4, 4, 6, 1, 1, 5, 5, 23, 5, 37.8, 1], ['2019-20', '22019', 1629076, 'Tyler Cook', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 16, 1, 2, 0.5, 0, 0, None, 2, 2, 1.0, 2, 2, 4, 0, 2, 0, 2, 1, 4, -3, 12.8, 1], ['2019-20', '22019', 203584, 'Troy Daniels', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 28, 4, 11, 0.364, 2, 8, 0.25, 0, 0, None, 0, 3, 3, 1, 1, 0, 3, 3, 10, -2, 15.1, 1], ['2019-20', '22019', 1627750, 'Jamal Murray', 1610612743, 'DEN', 'Denver Nuggets', '0021901318', '2020-08-14', 'DEN @ TOR', 'L', 10, 4, 5, 0.8, 3, 3, 1.0, 0, 0

In [32]:
player_game_logs_df = pd.DataFrame(
    data=player_game_logs_table, columns=final_lower_cased
)
player_game_logs_df.head()

Unnamed: 0,2019-20,season_id,player_id,player_name,team_id,team_abbreviation,team_name,game_id,game_date,matchup,...,reb,ast,stl,blk,tov,pf,pts,plus_minus,fantasy_pts,video_available
0,2019-20,22019,203943,Noah Vonleh,1610612743,DEN,Denver Nuggets,21901318,2020-08-14,DEN @ TOR,...,0,1,0,0,0,0,2,0,3.5,1
1,2019-20,22019,1626169,Stanley Johnson,1610612761,TOR,Toronto Raptors,21901318,2020-08-14,TOR vs. DEN,...,4,6,1,1,5,5,23,5,37.8,1
2,2019-20,22019,1629076,Tyler Cook,1610612743,DEN,Denver Nuggets,21901318,2020-08-14,DEN @ TOR,...,4,0,2,0,2,1,4,-3,12.8,1
3,2019-20,22019,203584,Troy Daniels,1610612743,DEN,Denver Nuggets,21901318,2020-08-14,DEN @ TOR,...,3,1,1,0,3,3,10,-2,15.1,1
4,2019-20,22019,1627750,Jamal Murray,1610612743,DEN,Denver Nuggets,21901318,2020-08-14,DEN @ TOR,...,0,1,1,0,0,2,11,-6,15.5,1


In [33]:
# Save to CSV
player_game_logs_df.to_csv(
    "../../data/interim/nba-stats-player_game_logs-2019-20.csv", index=False
)

In [34]:
game_id_list = []
for x in body_data:
    game_id_list.append(x[4])

print(len(set(game_id_list)))

30
