In [2]:
pip install nba_api

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashplayerstats
from collections import defaultdict
import time

In [4]:
customHeaders = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Referer': 'https://stats.nba.com/',
    'x-nba-stats-origin': 'stats',
    'x-nba-stats-token': 'true',
}

In [5]:
# Create a list of the seasons to analyze
seasons = ['2021-22']
seasons2 = ['2022-23']

In [6]:
# Scrapes stats.nba.com to get season average statistics using nba-api,
# creates Pandas dataframe from statistics
def get_data(season_list):
    time.sleep(1)
    stats = pd.DataFrame()

    for season in season_list:
        # Get Players Traditional Info
        allPlayersTraditionalInfo = leaguedashplayerstats.LeagueDashPlayerStats(per_mode_detailed='PerGame',
                                                                          season=season,
                                                                          measure_type_detailed_defense='Base',
                                                                          headers=customHeaders,
                                                                          timeout=120)
        allPlayersTraditionalDict = allPlayersTraditionalInfo.get_normalized_dict()
        allPlayersTraditionalList = allPlayersTraditionalDict['LeagueDashPlayerStats']

        # Get Players Advanced Info
        allPlayersAdvancedInfo = leaguedashplayerstats.LeagueDashPlayerStats(per_mode_detailed='PerGame',
                                                                       season=season,
                                                                       measure_type_detailed_defense='Advanced',
                                                                       headers=customHeaders,
                                                                       timeout=120)
        allPlayersAdvancedDict = allPlayersAdvancedInfo.get_normalized_dict()
        allPlayersAdvancedList = allPlayersAdvancedDict['LeagueDashPlayerStats']

        # Merge the two lists of dicts (Traditional Info and Advanced Info)
        d = defaultdict(dict)
        for l in (allPlayersTraditionalList, allPlayersAdvancedList):
            for elem in l:
                d[elem['PLAYER_ID']].update(elem)
        result = d.values()

        # Add the merged dict to the DataFrame
        df = pd.DataFrame(result)

        # Add season information
        df.insert(0, 'SEASON', season)

        # Concat with previous years stats
        stats = pd.concat([stats, df], ignore_index=True)

    return stats


In [7]:
df = get_data(seasons)

In [None]:
# Find the columns that do not appear on the NBA official website
indexes_to_drop = []
i = 0
for column in df.columns:
    if "RANK" in column:
        indexes_to_drop.append(i)

    if column == "SEASON" or column == "TEAM_ID" or column == "CFPARAMS" or column == "CFID" or column == "PACE_PER40" or column == "PLAYER_ID" or column == "NICKNAME" or column == "W_PCT" or column == "FGM" or column == "FGA" or column == "BLKA" or column == "PFD" or column == "NBA_FANTASY_PTS" or column == "WNBA_FANTASY_PTS" or column == "sp_work_OFF_RATING" or column == "sp_work_DEF_RATING" or column == "sp_work_NET_RATING" or column == "sp_work_PACE":
        indexes_to_drop.append(i)

    if column.startswith("E_"):
        indexes_to_drop.append(i)

    i += 1


In [None]:
# Drop the columns
df.drop(df.columns[indexes_to_drop], axis=1, inplace=True)

In [None]:
# Add the new column
df['CONFIRMED'] = 0

In [None]:
df

In [None]:
# Save dataframe in order to perform R analysis
df.to_csv('stat1.csv', sep=";", index=False)

In [None]:
df2 = get_data(seasons2)

In [None]:
# Find the columns that do not appear on the NBA official website
indexes_to_drop = []
i = 0
for column in df2.columns:
    if "RANK" in column:
        indexes_to_drop.append(i)

    if column == "SEASON" or column == "TEAM_ID" or column == "CFPARAMS" or column == "CFID" or column == "PACE_PER40" or column == "PLAYER_ID" or column == "NICKNAME" or column == "W_PCT" or column == "FGM" or column == "FGA" or column == "BLKA" or column == "PFD" or column == "NBA_FANTASY_PTS" or column == "WNBA_FANTASY_PTS" or column == "sp_work_OFF_RATING" or column == "sp_work_DEF_RATING" or column == "sp_work_NET_RATING" or column == "sp_work_PACE" or column == "TEAM_ABBREVIATION" or column == "AGE" or column == "GP" or column == "W" or column == "L" or column == "MIN" or column == "FG_PCT" or column == "FG3M" or column == "FG3A" or column == "FG3_PCT" or column == "FTM" or column == "FTA" or column == "FT_PCT" or column == "OREB" or column == "DREB" or column == "REB" or column == "AST" or column == "TOV" or column == "STL" or column == "BLK" or column == "PF" or column == "PTS" or column == "PLUS_MINUS" or column == "DD2" or column == "TD3" or column == "OFF_RATING" or column == "DEF_RATING" or column == "NET_RATING" or column == "AST_PCT" or column == "AST_TO" or column == "AST_RATIO" or column == "OREB_PCT" or column == "DREB_PCT" or column == "REB_PCT" or column == "TM_TOV_PCT" or column == "EFG_PCT" or column == "EFG_PC" or column == "USG_PCT" or column == "PACE" or column == "PIE" or column == "POSS" or column == "FGM_PG" or column == "FGA_PG" or column == 'TS_PCT':
        indexes_to_drop.append(i)

    if column.startswith("E_"):
        indexes_to_drop.append(i)

    i += 1


In [None]:
# Drop the columns
df2.drop(df2.columns[indexes_to_drop], axis=1, inplace=True)

In [None]:
# Add the new column
df2['CONFIRMED'] = 1

In [None]:
df2

In [None]:
# Save dataframe in order to perform R analysis
df2.to_csv('stat2.csv', sep=";", index=False)

In [None]:
tabella = pd.merge(df, df2, how = 'left', on='PLAYER_NAME')

In [None]:
tabella['CONFIRMED_y'] = tabella['CONFIRMED_y'].fillna(0)
tabella.drop('CONFIRMED_x', axis=1, inplace=True)
tabella.rename(columns = {'CONFIRMED_y':'CONFIRMED'}, inplace = True)

In [None]:
tabella

In [29]:
tabella = pd.merge(df, df2, how = 'left', on='PLAYER_NAME')

In [30]:
tabella['CONFIRMED_y'] = tabella['CONFIRMED_y'].fillna(0)
tabella.drop('CONFIRMED_x', axis=1, inplace=True)
tabella.rename(columns = {'CONFIRMED_y':'CONFIRMED'}, inplace = True)

In [31]:
tabella

Unnamed: 0,PLAYER_NAME,TEAM_ABBREVIATION,AGE,GP,W,L,MIN,FG_PCT,FG3M,FG3A,...,TM_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,PACE,PIE,POSS,FGM_PG,FGA_PG,CONFRIMED
0,Aaron Gordon,DEN,26.0,75,46,29,31.7,0.520,1.2,3.5,...,10.6,0.573,0.602,0.194,99.00,0.096,4908,5.8,11.1,1.0
1,Aaron Henry,PHI,22.0,6,6,0,2.8,0.200,0.0,0.2,...,28.6,0.200,0.200,0.149,105.99,-0.079,38,0.2,0.8,0.0
2,Aaron Holiday,PHX,25.0,63,34,29,16.2,0.447,0.6,1.6,...,11.4,0.504,0.544,0.182,99.57,0.088,2113,2.4,5.4,1.0
3,Aaron Nesmith,BOS,22.0,52,32,20,11.0,0.396,0.6,2.2,...,12.7,0.481,0.507,0.164,100.58,0.048,1196,1.4,3.5,1.0
4,Aaron Wiggins,OKC,23.0,50,13,37,24.2,0.463,0.8,2.8,...,10.9,0.525,0.556,0.149,101.60,0.062,2546,3.1,6.7,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
600,Zach LaVine,CHI,27.0,67,37,30,34.7,0.476,2.8,7.1,...,9.4,0.554,0.605,0.284,99.36,0.129,4827,8.4,17.7,1.0
601,Zavier Simpson,OKC,25.0,4,1,3,43.5,0.365,0.3,2.0,...,10.6,0.375,0.406,0.158,97.75,0.059,354,4.8,13.0,0.0
602,Zeke Nnaji,DEN,21.0,41,24,17,17.0,0.516,0.9,2.0,...,9.0,0.616,0.629,0.146,99.25,0.078,1443,2.3,4.5,1.0
603,Ziaire Williams,MEM,20.0,62,41,21,21.7,0.450,1.2,3.9,...,7.4,0.540,0.559,0.144,103.22,0.055,2896,3.1,6.8,0.0


In [32]:
# Save dataframe in order to perform R analysis
tabella.to_csv('tabella.csv', sep=";", index=False)