In [19]:
import numpy as np
import pandas as pd
import re
import requests
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn')
%matplotlib inline
colors = sns.color_palette('Set1', 9)

HEADERS = {
            'user-agent': ('Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'), 
            'Dnt': ('1'),
            'Accept-Encoding': ('gzip, deflate, sdch'),
            'Accept-Language': ('en'),
            'origin': ('http://stats.nba.com')
            }

In [20]:
P_URL = 'http://stats.nba.com/stats/{}'

season = 2017
endpoint = 'commonallplayers'

p_master=None

seasonstr = str(season)+'-'+str(season+1)[2:]

json =  requests.get(P_URL.format(endpoint),
                     params= {'LeagueID': '00',
                              'Season': seasonstr,
                              'IsOnlyCurrentSeason': 0
                             },
                     headers = HEADERS
                    ).json()

headers = json['resultSets'][0]['headers']
values = json['resultSets'][0]['rowSet']

df = pd.DataFrame(values, columns = headers)

df['season'] = season
cols = ['PERSON_ID', 'DISPLAY_FIRST_LAST']

p_master = df[cols].copy()

In [21]:
S_URL = 'http://stats.nba.com/stats/{}'

categories_dict ={'Overall':['D_FGA'],
                  'Less Than 6Ft':['FGA_LT_06'],
                  'Less Than 10Ft':['FGA_LT_10'],
                  'Greater Than 15Ft':['FGA_GT_15'],
                  '3 Pointers':['FG3A']
             }

seasons = [2015, 2016, 2017]
endpoint = 'leaguedashplayershotlocations'
categories = ['By Zone']
zones = ['o_RA',
         'o_Non_RA',
         'o_Mid',
         'o_L_Corner',
         'o_R_Corner',
         'o_Above_Break'
        ]

so_master=None
for season in seasons:
    seasondf = None
    for category in categories:
        seasonstr = str(season)+'-'+str(season+1)[2:]
        try:
            json =  requests.get(S_URL.format(endpoint),
                                 params= {'LeagueID': '00',
                                          'DistanceRange': category,
                                          'SeasonType': 'Regular Season',
                                          'MeasureType':'Base',
                                          'PerMode': 'Totals',
                                          'Season': seasonstr,
                                          'PORound': '0',
                                          'Outcome': '',
                                          'Location': '',
                                          'Month': '0',
                                          'SeasonSegment': '',
                                          'DateFrom': '',
                                          'DateTo': '',
                                          'OpponentTeamID': '0',
                                          'VsConference': '',
                                          'VsDivision': '',
                                          'TeamID': '0',
                                          'Conference': '',
                                          'Division': '',
                                          'LastNGames': '0',
                                          'GameScope': '',
                                          'PlayerExperience': '',
                                          'PlayerPosition': '',
                                          'StarterBench': '',
                                          'DraftYear': '',
                                          'DraftPick': '',
                                          'College': '',
                                          'Country': '',
                                          'Height': '',
                                          'Weight': '',
                                          'PlusMinus':'N',
                                          'Rank':'N',
                                          'GameSegment':'',
                                          'PaceAdjust':'N',
                                          'Period':'0',
                                         },
                                 headers = HEADERS
                                ).json()
            headers = json['resultSets']['headers'][1]['columnNames']
            values = json['resultSets']['rowSet']
            df = pd.DataFrame(values, columns = headers)
            df['season'] = season
            
            for i, z in enumerate(zones):
                df[z] = df['FGA'].iloc[:,i]
                
            cols = ['PLAYER_ID', 'season'] + zones

            if seasondf is None:
                seasondf = df[cols].copy()
            else:
                seasondf = pd.merge(df[cols], seasondf, how='outer',
                                     on=['PLAYER_ID', 'season'])
        except:
            pass
    if so_master is None:
        so_master = seasondf.copy()
    else:
        so_master = pd.concat([so_master, seasondf], ignore_index=True)

so_master['o_Corner'] = so_master['o_L_Corner'] + so_master['o_R_Corner']
so_master.drop(['o_L_Corner', 'o_R_Corner'], inplace=True, axis=1)
so_master = so_master.groupby(['PLAYER_ID', 'season']).sum().reset_index()

In [22]:
S_URL = 'http://stats.nba.com/stats/{}'

categories_dict ={'Overall':['D_FGA'],
                  'Less Than 6Ft':['FGA_LT_06'],
                  'Less Than 10Ft':['FGA_LT_10'],
                  'Greater Than 15Ft':['FGA_GT_15'],
                  '3 Pointers':['FG3A']
             }

seasons = [2015, 2016, 2017]
endpoint = 'leaguedashptdefend'

sd_master=None
for season in seasons:
    seasondf = None
    for category in categories_dict.keys():
        seasonstr = str(season)+'-'+str(season+1)[2:]
        try:
            json =  requests.get(S_URL.format(endpoint),
                                 params= {'LeagueID': '00',
                                          'DefenseCategory': category,
                                          'SeasonType': 'Regular Season',
                                          'PlayerOrTeam': 'Player',
                                          'PerMode': 'Totals',
                                          'Season': seasonstr,
                                          'PORound': '0',
                                          'Outcome': '',
                                          'Location': '',
                                          'Month': '0',
                                          'SeasonSegment': '',
                                          'DateFrom': '',
                                          'DateTo': '',
                                          'OpponentTeamID': '0',
                                          'VsConference': '',
                                          'VsDivision': '',
                                          'TeamID': '0',
                                          'Conference': '',
                                          'Division': '',
                                          'LastNGames': '0',
                                          'GameScope': '',
                                          'PlayerExperience': '',
                                          'PlayerPosition': '',
                                          'StarterBench': '',
                                          'DraftYear': '',
                                          'DraftPick': '',
                                          'College': '',
                                          'Country': '',
                                          'Height': '',
                                          'Weight': ''
                                         },
                                 headers = HEADERS
                                ).json()
            headers = json['resultSets'][0]['headers']
            values = json['resultSets'][0]['rowSet']
            df = pd.DataFrame(values, columns = headers)
            df['season'] = season
            cols = ['CLOSE_DEF_PERSON_ID', 'season'] + categories_dict[category]
            if seasondf is None:
                seasondf = df[cols].copy()
            else:
                seasondf = pd.merge(df[cols], seasondf, how='outer',
                                     on=['CLOSE_DEF_PERSON_ID', 'season'])
        except:
            pass
    if sd_master is None:
        sd_master = seasondf.copy()
    else:
        sd_master = pd.concat([sd_master, seasondf], ignore_index=True)

# <6 = FGA_LT_06
# 6-10 = FGA_LT_10 - FGA_LT_06
# 10-15 = D_FGA - FGA_GT_15 - FGA_LT_10
# 15-3PT = FGA_GT_15 - FG3A
# >3PT = FG3A        
sd_master['d_6_10'] = sd_master['FGA_LT_10'] - sd_master['FGA_LT_06']
sd_master['d_10_15'] = sd_master['D_FGA'] - sd_master['FGA_LT_10'] - sd_master['FGA_GT_15']
sd_master['d_15_3PT'] = sd_master['FGA_GT_15'] - sd_master['FG3A']
sd_master['d_3PT'] = sd_master['FG3A']
sd_master = sd_master[['CLOSE_DEF_PERSON_ID', 'season', 'd_6_10',
                     'd_10_15', 'd_15_3PT', 'd_3PT']].copy()
sd_master = sd_master.groupby(['CLOSE_DEF_PERSON_ID', 'season']).sum().reset_index()

s_master = pd.merge(so_master, sd_master, how='inner', left_on=['PLAYER_ID', 'season'],
                     right_on=['CLOSE_DEF_PERSON_ID', 'season'])
s_master.drop(['CLOSE_DEF_PERSON_ID'], inplace=True, axis=1)
shot_cols = [x for x in s_master.columns if x not in ['PLAYER_ID', 'season']]

In [25]:
PT_URL = 'https://stats-prod.nba.com/wp-json/statscms/v1/synergy/player'

categories = ['Transition', 'Isolation', 'PRBallHandler', 'PRRollMan', 'Postup',
              'Spotup', 'Handoff', 'Cut', 'OffScreen', 'OffRebound', 'Misc'
             ]
seasons = [2015,2016, 2017]
sides = ['offensive', 'defensive']

pt_master=None
for season in seasons:
    seasondf = None
    for side in sides:
        for category in categories:
            try:
                json =  requests.get(PT_URL,
                                     params={'season': season,
                                             'names': side,
                                             'category':category,
                                             'seasonType': 'Reg',
                                             'limit':500                             
                                             },
                                     headers = HEADERS
                                    ).json()

                headers = json['results'][0].keys()
                values = json['results']

                df = pd.DataFrame(values, columns = headers)

                posscol = side[0] + '_' + category + '_' + 'Poss'
                df[posscol] = df['Poss']
                cols = ['PlayerIDSID', 'P', 'season', posscol]
                if seasondf is None:
                    seasondf = df[cols].copy()
                else:
                    seasondf = pd.merge(df[cols], seasondf, how='outer',
                                         on=['PlayerIDSID', 'P', 'season'])
            except:
                pass
    if pt_master is None:
        pt_master = seasondf.copy()
    else:
        pt_master = pd.concat([pt_master, seasondf], ignore_index=True)
        
pt_master['P'] = pt_master['P'].str.replace('F--G', 'G-F')
pt_master['P'] = pt_master['P'].str.replace('F-G', 'G-F')
pt_master['P'] = pt_master['P'].str.replace('C-F', 'F-C')
pt_master = pt_master.loc[pt_master['P'] != '']
pt_master = pt_master.groupby(['PlayerIDSID', 'P', 'season']).sum().reset_index()
pt_master = pd.merge(pt_master, p_master, how='inner', left_on='PlayerIDSID',
                     right_on='PERSON_ID')
pt_master = pd.merge(pt_master, s_master, how='inner', left_on=['PlayerIDSID', 'season'],
                     right_on=['PLAYER_ID', 'season'])

In [26]:
T_URL = 'http://stats.nba.com/stats/{}'

categories_dict = {'Drives':['DRIVES', 'DRIVE_PASSES', 'MIN'],
                   'CatchShoot':['CATCH_SHOOT_FGA', 'CATCH_SHOOT_FG3A'],
                   'Passing':['PASSES_MADE', 'PASSES_RECEIVED'],
                   'Possessions':['AVG_SEC_PER_TOUCH', 'AVG_DRIB_PER_TOUCH',
                                  'ELBOW_TOUCHES', 'POST_TOUCHES', 'PAINT_TOUCHES', 'TOUCHES'],
                   'PullUpShot':['PULL_UP_FGA', 'PULL_UP_FG3A'],
                   'Rebounding':['OREB_CHANCES', 'AVG_OREB_DIST',
                                 'DREB_CHANCES', 'AVG_DREB_DIST'],
                   'Defense':['DEF_RIM_FGA']
                  }
seasons = [2015, 2016, 2017]
endpoint = 'leaguedashptstats'

t_master=None
for season in seasons:
    seasondf = None
    for category in categories_dict.keys():
        seasonstr = str(season)+'-'+str(season+1)[2:]
        try:
            json =  requests.get(T_URL.format(endpoint),
                                 params= {'LeagueID': '00',
                                          'PtMeasureType': category,
                                          'SeasonType': 'Regular Season',
                                          'PlayerOrTeam': 'Player',
                                          'PerMode': 'Totals',
                                          'Season': seasonstr,
                                          'PORound': '0',
                                          'Outcome': '',
                                          'Location': '',
                                          'Month': '0',
                                          'SeasonSegment': '',
                                          'DateFrom': '',
                                          'DateTo': '',
                                          'OpponentTeamID': '0',
                                          'VsConference': '',
                                          'VsDivision': '',
                                          'TeamID': '0',
                                          'Conference': '',
                                          'Division': '',
                                          'LastNGames': '0',
                                          'GameScope': '',
                                          'PlayerExperience': '',
                                          'PlayerPosition': '',
                                          'StarterBench': '',
                                          'DraftYear': '',
                                          'DraftPick': '',
                                          'College': '',
                                          'Country': '',
                                          'Height': '',
                                          'Weight': ''
                                         },
                                 headers = HEADERS
                                ).json()
            headers = json['resultSets'][0]['headers']
            values = json['resultSets'][0]['rowSet']
            df = pd.DataFrame(values, columns = headers)
            df['season'] = season
            cols = ['PLAYER_ID', 'season'] + categories_dict[category]
            if seasondf is None:
                seasondf = df[cols].copy()
            else:
                seasondf = pd.merge(df[cols], seasondf, how='outer',
                                     on=['PLAYER_ID', 'season'])
        except:
            pass
    if t_master is None:
        t_master = seasondf.copy()
    else:
        t_master = pd.concat([t_master, seasondf], ignore_index=True)

In [27]:
df = pd.merge(t_master, pt_master, how='inner', left_on=['PLAYER_ID', 'season'],
             right_on=['PLAYER_ID', 'season'])

poss_cols = [x for x in df.columns if x.endswith('Poss')]

# Replace null with 0
df.fillna(0, inplace=True)

# Get number of 2 point attempts
for col in ['CATCH_SHOOT', 'PULL_UP']:
    twopa = '{}_FG2A'
    thrpa = '{}_FG3A'
    fga = '{}_FGA'
    df[twopa.format(col)] = df[fga.format(col)] - df[thrpa.format(col)]
    df.drop(fga.format(col), axis=1, inplace=True)
    
# Get percentage of drives that end with a pass
df['DRIVE_PASS_PCT'] = df['DRIVE_PASSES']/df['DRIVES']
df.drop('DRIVE_PASSES', axis=1, inplace=True)

# Get per minute values of most columns
for col in poss_cols:
    df[col] = df[col]/df['MIN']

for col in  ['DEF_RIM_FGA',
             'OREB_CHANCES',
             'DREB_CHANCES',
             'PULL_UP_FG2A',
             'PULL_UP_FG3A',
             'ELBOW_TOUCHES',
             'POST_TOUCHES',
             'PAINT_TOUCHES',
             'TOUCHES',
             'PASSES_MADE',
             'PASSES_RECEIVED',
             'CATCH_SHOOT_FG2A',
             'CATCH_SHOOT_FG3A',
             'DRIVES'] + shot_cols:
    df[col] = df[col]/df['MIN']

dcol_rename = {col:'d_' + col for col in ['AVG_DREB_DIST', 'DREB_CHANCES', 'DEF_RIM_FGA']}
df.rename(columns=dcol_rename, inplace=True)
    
info_cols = ['PLAYER_ID', 'season', 'PlayerIDSID', 'DISPLAY_FIRST_LAST',
             'PERSON_ID', 'P', 'MIN']
ocol_rename = {x:'o_' + x for x in df.columns if
               (x not in info_cols) and not (x[:2] in ['d_', 'o_'])}
df.rename(columns=ocol_rename, inplace=True)

df.fillna(0, inplace=True)

# Keep only top 240 players for each year (8 from each team)
df = df.sort_values('MIN', ascending=False).groupby('season').head(240).reset_index()

In [33]:
df.to_csv('Player_Data.csv', index=False)