In [1]:
%config IPCompleter.greedy=True

# Import the dependencies.
import json
import re
import time

import numpy as np
import pandas as pd
import requests

In [2]:
# The request files to verify its structure.
base_url = "https://stats.nba.com/stats/"

headers = {
    "Host": "stats.nba.com",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate, br",
    "x-nba-stats-origin": "stats",
    "x-nba-stats-token": "true",
    "Connection": "keep-alive",
    "Referer": "https://stats.nba.com/",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}

measure_type = {"base": "Base", "advanced": "Advanced"}

stat_modes_depth = {
    "Per100Possessions": "Per100Possessions",
    "Totals": "Totals",
    "Per36": "Per36",
    "PerGame": "PerGame",
}

season_id = "2019-20"
team_id = 1610612737
per_mode = "PerGame"

season_list = [
    "1996-97",
    "1997-98",
    "1998-99",
    "1999-00",
    "2000-01",
    "2001-02",
    "2002-03",
    "2003-04",
    "2004-05",
    "2005-06",
    "2006-07",
    "2007-08",
    "2008-09",
    "2009-10",
    "2010-11",
    "2011-12",
    "2012-13",
    "2013-14",
    "2014-15",
    "2015-16",
    "2016-17",
    "2017-18",
    "2018-19",
    "2019-20",
]

list_of_nbastats_apis = {
    # base_url, per-mode, season
    "player_general_traditional_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_general_advanced_totals": f"{base_url}leaguedashplayerstats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=&Weight=",
    # base_url, per-mode, season
    "player_bios": f"{base_url}leaguedashplayerbiostats?College=&Conference=&Country=&DateFrom=&DateTo=&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight=",
    # base_url, season, but look into PlayerOrTeam
    "player_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=P&Season={season_id}&SeasonType=Regular+Season&Sorter=DATE",
    # base_url, season, but look into PlayerOrTeam
    "team_game_logs": f"{base_url}leaguegamelog?Counter=1000&DateFrom=&DateTo=&Direction=DESC&LeagueID=00&PlayerOrTeam=T&Season={season_id}&SeasonType=Regular+Season&Sorter=DATE",
    # base_url, per-mode, season
    "team_general_traditional_stats": f"{base_url}leaguedashteamstats?DateFrom=&DateTo=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&VsConference=&VsDivision=",
    # base_url, per-mode, season
    "team_general_advanced_stats": f"{base_url}leaguedashteamstats?Conference=&DateFrom=&DateTo=&Division=&GameScope=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode={per_mode}&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&StarterBench=&TeamID=0&TwoWay=0&VsConference=&VsDivision=",
    # base_url, per-mode, season, team
    "players_by_team": f"{base_url}teamplayerdashboard?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PaceAdjust=N&PerMode={per_mode}&Period=0&PlusMinus=N&Rank=N&Season={season_id}&SeasonSegment=&SeasonType=Regular+Season&TeamID={team_id}&VsConference=&VsDivision=",
    # base_url, season
    "player_abbr": f"{base_url}commonallplayers?IsOnlyCurrentSeason=0&LeagueID=00&Season={season_id}",
    # base_url, team, season
    "team_abbr": f"{base_url}teaminfocommon?LeagueID=00&SeasonType=Regular+Season&TeamID={team_id}&Season={season_id}",
}

# I want to just download the CSV version of Player's General Traditional stats
# https://www.nba.com/stats/players/traditional/?sort=PTS&dir=-1&Season=2019-20&SeasonType=Regular%20Season

response = requests.get(
    url=list_of_nbastats_apis["player_general_traditional_totals"], headers=headers
).json()

# pretty printing data
pretty_data = json.dumps(response, indent=4)
print(len(pretty_data))

900305


In [2]:
# pulling the headers we want to experiment with
stat_header = response["resultSets"][0]["headers"]
print(stat_header)

NameError: name 'response' is not defined

In [1]:
# lower case all the columns names
lower_cased_stat_header = [[header.lower()] for item in stat_header for header in item]
print(lower_cased_stat_header)

NameError: name 'stat_header' is not defined

In [4]:
# pulling just the data we want
body_data = response["resultSets"][0]["rowSet"]
print(len(body_data))

529


In [5]:
# looping over data to insert into table
player_general_traditional_totals_table = []
for row in body_data:
    player_general_traditional_totals_table.append(
        [
            "2019-20",  # this is key, need this to join and sort by seasons
            row[0],
            row[1],
            row[2],
            row[3],
            row[4],
            row[5],
            row[6],
            row[7],
            row[8],
            row[9],
            row[10],
            row[11],
            row[12],
            row[13],
            row[14],
            row[15],
            row[16],
            row[17],
            row[18],
            row[19],
            row[20],
            row[21],
            row[22],
            row[23],
            row[24],
            row[25],
            row[26],
            row[27],
            row[28],
            row[29],
            row[30],
            row[31],
            row[32],
            row[33],
            # gp_rank=row[34],
            # w_rank=row[35],
            # l_rank=row[36],
            # w_pct_rank=row[37],
            # min_rank=row[38],
            # fgm_rank=row[39],
            # fga_rank=row[40],
            # fg_pct_rank=row[41],
            # fg3m_rank=row[42],
            # fg3a_rank=row[43],
            # fg3_pct_rank=row[44],
            # ftm_rank=row[45],
            # fta_rank=row[46],
            # ft_pct_rank=row[47],
            # oreb_rank=row[48],
            # dreb_rank=row[49],
            # reb_rank=row[50],
            # ast_rank=row[51],
            # tov_rank=row[52],
            # stl_rank=row[53],
            # blk_rank=row[54],
            # blka_rank=row[55],
            # pf_rank=row[56],
            # pfd_rank=row[57],
            # pts_rank=row[58],
            # plus_minus_rank=row[59],
            # nba_fantasy_points_rank=row[60],
            # dd2_rank=row[61],
            # td3_rank=row[62],
            row[63],
            row[64],
        ]
    )

print(player_general_traditional_totals_table)

[['2019-20', 203932, 'Aaron Gordon', 1610612753, 'ORL', 24.0, 62, 30, 32, 0.484, 32.5, 5.4, 12.4, 0.437, 1.2, 3.8, 0.308, 2.4, 3.6, 0.674, 1.7, 5.9, 7.7, 3.7, 1.6, 0.8, 0.6, 0.7, 2.0, 3.3, 14.4, -1.1, 31.9, 20, 1, 5, '203932,1610612753'], ['2019-20', 1628988, 'Aaron Holiday', 1610612754, 'IND', 23.0, 66, 42, 24, 0.636, 24.5, 3.5, 8.5, 0.414, 1.3, 3.3, 0.394, 1.1, 1.3, 0.851, 0.3, 2.0, 2.4, 3.4, 1.3, 0.8, 0.2, 0.4, 1.8, 1.7, 9.5, 1.7, 19.3, 3, 0, 5, '1628988,1610612754'], ['2019-20', 1627846, 'Abdel Nader', 1610612760, 'OKC', 26.0, 55, 37, 18, 0.673, 15.8, 2.2, 4.8, 0.468, 0.9, 2.3, 0.375, 0.9, 1.2, 0.773, 0.3, 1.6, 1.8, 0.7, 0.8, 0.4, 0.4, 0.2, 1.4, 0.9, 6.3, -1.5, 11.1, 0, 0, 5, '1627846,1610612760'], ['2019-20', 1629690, 'Adam Mokoka', 1610612741, 'CHI', 21.0, 11, 3, 8, 0.273, 10.2, 1.1, 2.5, 0.429, 0.5, 1.4, 0.4, 0.2, 0.4, 0.5, 0.6, 0.3, 0.9, 0.4, 0.2, 0.4, 0.0, 0.4, 1.5, 0.4, 2.9, 4.5, 5.5, 0, 0, 5, '1629690,1610612741'], ['2019-20', 1629678, 'Admiral Schofield', 1610612764, 'WAS',

In [6]:
# Converting a List to DataFrame

# List of columns
columns = [
    "season_id",
    "nba_stats_player_id",
    "player_name",
    "team_id",
    "team_abbreviation",
    "age",
    "gp",
    "w",
    "l",
    "w_pct",
    "min",
    "fgm",
    "fga",
    "fg_pct",
    "fg3m",
    "fg3a",
    "fg3_pct",
    "ftm",
    "fta",
    "ft_pct",
    "oreb",
    "dreb",
    "reb",
    "ast",
    "tov",
    "stl",
    "blk",
    "blka",
    "pf",
    "pfd",
    "pts",
    "plus_minus",
    "nba_fantasy_points",
    "dd2",
    "td3",
    "cfid",
    "cfparams",
]

player_general_traditional_totals_df = pd.DataFrame(
    data=player_general_traditional_totals_table, columns=columns
)
player_general_traditional_totals_df.head()

Unnamed: 0,season_id,nba_stats_player_id,player_name,team_id,team_abbreviation,age,gp,w,l,w_pct,...,blka,pf,pfd,pts,plus_minus,nba_fantasy_points,dd2,td3,cfid,cfparams
0,2019-20,203932,Aaron Gordon,1610612753,ORL,24.0,62,30,32,0.484,...,0.7,2.0,3.3,14.4,-1.1,31.9,20,1,5,2039321610612753
1,2019-20,1628988,Aaron Holiday,1610612754,IND,23.0,66,42,24,0.636,...,0.4,1.8,1.7,9.5,1.7,19.3,3,0,5,16289881610612754
2,2019-20,1627846,Abdel Nader,1610612760,OKC,26.0,55,37,18,0.673,...,0.2,1.4,0.9,6.3,-1.5,11.1,0,0,5,16278461610612760
3,2019-20,1629690,Adam Mokoka,1610612741,CHI,21.0,11,3,8,0.273,...,0.4,1.5,0.4,2.9,4.5,5.5,0,0,5,16296901610612741
4,2019-20,1629678,Admiral Schofield,1610612764,WAS,23.0,33,9,24,0.273,...,0.1,1.5,0.5,3.0,-1.7,6.3,0,0,5,16296781610612764


In [7]:
# Save to CSV
player_general_traditional_totals_df.to_csv(
    "../../data/interim/nba-stats-general-traditional-2019-20.csv", index=False
)

In [8]:
# I want to just download the CSV version of Player's General Traditional stats
# https://www.nba.com/stats/players/traditional/?sort=PTS&dir=-1&Season=2019-20&SeasonType=Regular%20Season

adv_response = requests.get(
    url=list_of_nbastats_apis["player_general_advanced_totals"], headers=headers
).json()

# pretty printing data
pretty_data = json.dumps(adv_response, indent=4)
print(len(pretty_data))

1107957


In [9]:
# pulling the headers we want to experiment with
stat_header = adv_response["resultSets"][0]["headers"]
print(stat_header)
print(len(stat_header))

['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN', 'E_OFF_RATING', 'OFF_RATING', 'sp_work_OFF_RATING', 'E_DEF_RATING', 'DEF_RATING', 'sp_work_DEF_RATING', 'E_NET_RATING', 'NET_RATING', 'sp_work_NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO', 'OREB_PCT', 'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'E_TOV_PCT', 'EFG_PCT', 'TS_PCT', 'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'sp_work_PACE', 'PIE', 'POSS', 'FGM', 'FGA', 'FGM_PG', 'FGA_PG', 'FG_PCT', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'E_OFF_RATING_RANK', 'OFF_RATING_RANK', 'sp_work_OFF_RATING_RANK', 'E_DEF_RATING_RANK', 'DEF_RATING_RANK', 'sp_work_DEF_RATING_RANK', 'E_NET_RATING_RANK', 'NET_RATING_RANK', 'sp_work_NET_RATING_RANK', 'AST_PCT_RANK', 'AST_TO_RANK', 'AST_RATIO_RANK', 'OREB_PCT_RANK', 'DREB_PCT_RANK', 'REB_PCT_RANK', 'TM_TOV_PCT_RANK', 'E_TOV_PCT_RANK', 'EFG_PCT_RANK', 'TS_PCT_RANK', 'USG_PCT_RANK', 'E_USG_PCT_RANK', 'E_PACE_RANK', 'PACE_RANK', 'sp_work_

In [10]:
# pulling just the data we want
body_data = adv_response["resultSets"][0]["rowSet"]
one_row = body_data[0]
print(body_data[0])

[203932, 'Aaron Gordon', 1610612753, 'ORL', 24.0, 62, 30, 32, 0.484, 32.5, 107.5, 108.6, 108.6, 108.8, 109.7, 109.7, -1.3, -1.2, -1.2, 0.165, 2.28, 19.1, 0.05, 0.181, 0.114, 8.4, 8.4, 0.484, 0.516, 0.205, 0.208, 100.62, 99.72, 83.1, 99.72, 0.104, 4182, 335, 767, 5.4, 12.4, 0.437, 123, 152, 397, 247, 48, 265, 237, 237, 300, 334, 334, 269, 273, 273, 147, 104, 157, 163, 106, 126, 133, 132, 369, 362, 148, 156, 404, 451, 451, 150, 75, 68, 90, 81, 281, 5, '203932,1610612753']


In [11]:
# looping over data to insert into table
player_general_advanced_totals_table = []
for row in body_data:
    player_general_advanced_totals_table.append(
        ["2019-20",  # this is key, need this to join and sort by seasons
        row[0],
        row[1],
        row[2],
        row[3],
        row[4],
        row[5],
        row[6],
        row[7],
        row[8],
        row[9],
        row[10],
        row[11],
        #sp_work_OFF_RATING=row[12],
        row[13],
        row[14],
        #sp_work_DEF_RATING=row[15],
        row[16],
        row[17],
        #sp_work_NET_RATING=row[18],
        row[19],
        row[20],
        row[21],
        row[22],
        row[23],
        row[24],
        row[25],
        row[26],
        row[27],
        row[28],
        row[29],
        row[30],
        row[31],
        row[32],
        row[33],
        #sp_work_PACE=row[34],
        row[35],
        row[36],
        row[37],
        row[38],
        row[39],
        row[40],
        row[41],
        row[42],
        row[43],
        row[44],
        row[45],
        row[46],
        row[47],
        row[48],
        #sp_work_OFF_RATING_RANK=row[49],
        row[50],
        row[51],
        #sp_work_DEF_RATING_RANK=row[52],
        row[53],
        row[54],
        #sp_work_NET_RATING_RANK=row[55],
        row[56],
        row[57],
        row[58],
        row[59],
        row[60],
        row[61],
        row[62],
        row[63],
        row[64],
        row[65],
        row[66],
        row[67],
        row[68],
        row[69],
        #sp_work_PACE_RANK=row[70],
        row[71],
        row[72],
        row[73],
        row[74],
        row[75],
        row[76],
        row[77],
        row[78],
        ]
    )

print(player_general_advanced_totals_table)

[['2019-20', 203932, 'Aaron Gordon', 1610612753, 'ORL', 24.0, 62, 30, 32, 0.484, 32.5, 107.5, 108.6, 108.8, 109.7, -1.3, -1.2, 0.165, 2.28, 19.1, 0.05, 0.181, 0.114, 8.4, 8.4, 0.484, 0.516, 0.205, 0.208, 100.62, 99.72, 83.1, 0.104, 4182, 335, 767, 5.4, 12.4, 0.437, 123, 152, 397, 247, 48, 265, 237, 300, 334, 269, 273, 147, 104, 157, 163, 106, 126, 133, 132, 369, 362, 148, 156, 404, 451, 150, 75, 68, 90, 81, 281, 5, '203932,1610612753'], ['2019-20', 1628988, 'Aaron Holiday', 1610612754, 'IND', 23.0, 66, 42, 24, 0.636, 24.5, 109.4, 109.1, 105.0, 106.9, 4.5, 2.2, 0.188, 2.56, 24.6, 0.013, 0.077, 0.046, 9.6, 9.6, 0.491, 0.521, 0.182, 0.187, 101.07, 100.31, 83.59, 0.078, 3398, 233, 563, 3.5, 8.5, 0.414, 70, 51, 299, 103, 181, 173, 215, 145, 203, 122, 175, 116, 66, 79, 459, 453, 463, 204, 204, 345, 347, 216, 221, 374, 416, 306, 150, 128, 186, 160, 358, 5, '1628988,1610612754'], ['2019-20', 1627846, 'Abdel Nader', 1610612760, 'OKC', 26.0, 55, 37, 18, 0.673, 15.8, 100.7, 100.9, 105.3, 105.2, -

In [12]:
# Converting a List to DataFrame

# List of columns
columns = [
    "season_id",
    "nba_stats_player_id",
    "player_name",
    "team_id",
    "team_abbreviation",
    "age",
    "gp",
    "w",
    "l",
    "w_pct",
    "min",
    "E_OFF_RATING",
    "OFF_RATING",
    "E_DEF_RATING",
    "DEF_RATING",
    "E_NET_RATING",
    "NET_RATING",
    "AST_PCT",
    "AST_TO",
    "AST_RATIO",
    "OREB_PCT",
    "DREB_PCT",
    "REB_PCT",
    "TM_TOV_PCT",
    "E_TOV_PCT",
    "EFG_PCT",
    "TS_PCT",
    "USG_PCT",
    "E_USG_PCT",
    "E_PACE",
    "PACE",
    "PACE_PER40",
    "PIE",
    "POSS",
    "FGM",
    "FGA",
    "FGM_PG",
    "FGA_PG",
    "FG_PCT",
    "GP_RANK",
    "W_RANK",
    "L_RANK",
    "W_PCT_RANK",
    "MIN_RANK",
    "E_OFF_RATING_RANK",
    "OFF_RATING_RANK",
    "E_DEF_RATING_RANK",
    "DEF_RATING_RANK",
    "E_NET_RATING_RANK",
    "NET_RATING_RANK",
    "AST_PCT_RANK",
    "AST_TO_RANK",
    "AST_RATIO_RANK",
    "OREB_PCT_RANK",
    "DREB_PCT_RANK",
    "REB_PCT_RANK",
    "TM_TOV_PCT_RANK",
    "E_TOV_PCT_RANK",
    "EFG_PCT_RANK",
    "TS_PCT_RANK",
    "USG_PCT_RANK",
    "E_USG_PCT_RANK",
    "E_PACE_RANK",
    "PACE_RANK",
    "PIE_RANK",
    "FGM_RANK",
    "FGA_RANK",
    "FGM_PG_RANK",
    "FGA_PG_RANK",
    "FG_PCT_RANK",
    "CFID",
    "CFPARAMS",
]

player_general_advanced_totals_df = pd.DataFrame(
    data=player_general_advanced_totals_table, columns=columns
)
player_general_advanced_totals_df.head()

Unnamed: 0,season_id,nba_stats_player_id,player_name,team_id,team_abbreviation,age,gp,w,l,w_pct,...,E_PACE_RANK,PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK,CFID,CFPARAMS
0,2019-20,203932,Aaron Gordon,1610612753,ORL,24.0,62,30,32,0.484,...,404,451,150,75,68,90,81,281,5,2039321610612753
1,2019-20,1628988,Aaron Holiday,1610612754,IND,23.0,66,42,24,0.636,...,374,416,306,150,128,186,160,358,5,16289881610612754
2,2019-20,1627846,Abdel Nader,1610612760,OKC,26.0,55,37,18,0.673,...,317,317,349,266,274,308,321,173,5,16278461610612760
3,2019-20,1629690,Adam Mokoka,1610612741,CHI,21.0,11,3,8,0.273,...,464,413,504,448,453,441,447,306,5,16296901610612741
4,2019-20,1629678,Admiral Schofield,1610612764,WAS,23.0,33,9,24,0.273,...,230,225,484,393,377,444,435,436,5,16296781610612764


In [13]:
print(player_general_advanced_totals_df.loc[:,'player_name'].to_string(index=False))

            Aaron Gordon
           Aaron Holiday
             Abdel Nader
             Adam Mokoka
       Admiral Schofield
              Al Horford
         Al-Farouq Aminu
              Alec Burks
          Alen Smailagic
             Alex Caruso
                Alex Len
        Alfonzo McKinnie
           Alize Johnson
            Allen Crabbe
           Allonzo Trier
         Amile Jefferson
             Amir Coffey
          Andre Drummond
          Andre Iguodala
          Andre Roberson
          Andrew Wiggins
         Anfernee Simons
              Ante Zizic
           Anthony Davis
        Anthony Tolliver
      Antonius Cleveland
        Anzejs Pasecniks
             Aron Baynes
           Austin Rivers
           Avery Bradley
              BJ Johnson
             Bam Adebayo
            Ben McLemore
             Ben Simmons
         Bismack Biyombo
           Blake Griffin
        Boban Marjanovic
            Bobby Portis
       Bogdan Bogdanovic
        Bojan Bogdanovic


In [14]:
# Save to CSV
player_general_advanced_totals_df.to_csv(
    "../../data/interim/nba-stats-general-advanced-2019-20.csv", index=False
)

PermissionError: [Errno 13] Permission denied: '../../data/interim/nba-stats-general-advanced-2019-20.csv'