# Setup

In [2]:
import pandas as pd
import numpy as np
from nba_api.stats.static import players
from nba_api.stats.static import teams
from functools import partial

In [3]:
from nba_api.stats.endpoints import LeagueDashPlayerStats

In [4]:
# Import scikit-learn modules

# Global Variables

In [5]:
seasons = ['2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19']
curr_season = '2019-20'
all_teams = teams.get_teams()
all_players = players.get_players()
active_players = players.get_active_players()

# Data Gathering

## Four Factor Data Gathering (Team data)

### OFFENSE
1. Effective field goal percentage
2. Turnovers per possession
3. Offensive rebounding percentage
4. Free throw rate

### DEFENSE
1. Opponent's EFG
2. Defensive turnovers caused per possession
3. Defensive rebounding rate
4. Opponent's free throw rate

2013-17: training
2018-19: test
current season: prediction

In [20]:
from nba_api.stats.endpoints import LeagueDashTeamStats

In [21]:
def get_team_ff(out_df, in_season):
    df = LeagueDashTeamStats(season=in_season,
                                measure_type_detailed_defense= 'Four Factors').get_data_frames()[0]

    df['season'] = in_season
    out_df = pd.concat([out_df, df])
    return out_df

In [14]:
team_ff_df = pd.DataFrame()

for season in seasons:
    team_ff_df = get_team_ff(team_ff_df, season)

In [107]:
team_ff_df.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,...,FTA_RATE_RANK,TM_TOV_PCT_RANK,OREB_PCT_RANK,OPP_EFG_PCT_RANK,OPP_FTA_RATE_RANK,OPP_TOV_PCT_RANK,OPP_OREB_PCT_RANK,CFID,CFPARAMS,season
0,1610612737,Atlanta Hawks,82,38,44,0.463,3966.0,0.515,0.266,0.159,...,21,21,28,19,9,9,13,10,Atlanta Hawks,2013-14
1,1610612738,Boston Celtics,82,25,57,0.305,3946.0,0.477,0.248,0.163,...,25,28,7,16,19,18,16,10,Boston Celtics,2013-14
2,1610612751,Brooklyn Nets,82,44,38,0.537,3976.0,0.514,0.313,0.155,...,5,16,27,18,25,2,26,10,Brooklyn Nets,2013-14
3,1610612766,Charlotte Bobcats,82,43,39,0.524,3981.0,0.481,0.297,0.131,...,10,1,24,9,2,28,1,10,Charlotte Bobcats,2013-14
4,1610612741,Chicago Bulls,82,48,34,0.585,3986.0,0.471,0.29,0.162,...,14,27,3,2,4,13,12,10,Chicago Bulls,2013-14


In [15]:
curr_team_ff = pd.DataFrame()
curr_team_ff = get_team_ff(curr_team_ff, curr_season)

In [108]:
curr_team_ff.head()

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,EFG_PCT,FTA_RATE,TM_TOV_PCT,...,FTA_RATE_RANK,TM_TOV_PCT_RANK,OREB_PCT_RANK,OPP_EFG_PCT_RANK,OPP_FTA_RATE_RANK,OPP_TOV_PCT_RANK,OPP_OREB_PCT_RANK,CFID,CFPARAMS,season
0,1610612737,Atlanta Hawks,56,15,41,0.268,2713.0,0.512,0.257,0.159,...,15,29,14,24,29,13,28,10,Atlanta Hawks,2019-20
1,1610612738,Boston Celtics,54,38,16,0.704,2607.0,0.532,0.256,0.137,...,17,9,10,5,24,5,12,10,Boston Celtics,2019-20
2,1610612751,Brooklyn Nets,53,25,28,0.472,2574.0,0.516,0.264,0.152,...,11,25,9,4,8,26,11,10,Brooklyn Nets,2019-20
3,1610612766,Charlotte Hornets,54,18,36,0.333,2612.0,0.503,0.249,0.152,...,22,24,8,27,1,11,30,10,Charlotte Hornets,2019-20
4,1610612741,Chicago Bulls,55,19,36,0.345,2655.0,0.511,0.236,0.152,...,25,26,21,26,30,1,21,10,Chicago Bulls,2019-20


In [17]:
ff_history_filename = '2013-19_team_ff.csv'
ff_curr_filename = '2019-20_team_ff.csv'

team_ff_df.to_csv(ff_history_filename)
curr_team_ff.to_csv(ff_curr_filename)

## Advanced Per 100 Player Stats

In [6]:
def get_adv_stats(out_df, in_season):
    """
    This function gathers advanced player data starting from the 2013-14 season
    to the 2018-19 season and combines it with the master advanced stats dataframe.
    """
    df = LeagueDashPlayerStats(season = in_season,
                                per_mode_detailed='Per100Possessions',
                                measure_type_detailed_defense='Advanced').get_data_frames()[0]
    df['season'] = in_season
    #print(df.head())
    out_df = pd.concat([out_df, df])
    #print(out_df.head())
    return out_df

In [7]:
# Create advanced stats dataframe for all seasons
players_adv_df = pd.DataFrame()

for season in seasons: 
    players_adv_df = get_adv_stats(players_adv_df, season)

In [8]:
players_adv_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,sp_work_PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK,CFID,CFPARAMS,season
0,201985,AJ Price,1610612750,MIN,27.0,28,15,13,0.536,3.5,...,25,220,409,410,435,440,336,5,2019851610612750,2013-14
1,201166,Aaron Brooks,1610612743,DEN,29.0,72,42,30,0.583,21.6,...,78,274,172,151,195,165,360,5,2011661610612743,2013-14
2,201189,Aaron Gray,1610612758,SAC,29.0,37,12,25,0.324,9.6,...,157,423,395,400,431,438,217,5,2011891610612758,2013-14
3,203519,Adonis Thomas,1610612755,PHI,21.0,6,2,4,0.333,6.3,...,234,433,440,446,397,406,272,5,2035191610612755,2013-14
4,1733,Al Harrington,1610612764,WAS,34.0,34,20,14,0.588,15.0,...,312,371,305,298,257,229,371,5,17331610612764,2013-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,203897,Zach LaVine,1610612741,CHI,24.0,63,16,47,0.254,34.5,...,421,78,32,34,20,16,185,5,2038971610612741,2018-19
526,1629155,Zach Lofton,1610612765,DET,26.0,1,1,0,1.000,3.8,...,252,527,519,522,519,515,519,5,16291551610612765,2018-19
527,2585,Zaza Pachulia,1610612765,DET,35.0,68,35,33,0.515,12.9,...,427,239,336,332,435,440,260,5,25851610612765,2018-19
528,1629015,Zhaire Smith,1610612755,PHI,20.0,6,2,4,0.333,18.4,...,188,437,454,454,305,278,365,5,16290151610612755,2018-19


In [22]:
players_adv_df.columns

Index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'GP',
       'W', 'L', 'W_PCT', 'MIN', 'E_OFF_RATING', 'OFF_RATING',
       'sp_work_OFF_RATING', 'E_DEF_RATING', 'DEF_RATING',
       'sp_work_DEF_RATING', 'E_NET_RATING', 'NET_RATING',
       'sp_work_NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO', 'OREB_PCT',
       'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'E_TOV_PCT', 'EFG_PCT', 'TS_PCT',
       'USG_PCT', 'E_USG_PCT', 'E_PACE', 'PACE', 'PACE_PER40', 'sp_work_PACE',
       'PIE', 'POSS', 'FGM', 'FGA', 'FGM_PG', 'FGA_PG', 'FG_PCT', 'GP_RANK',
       'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'E_OFF_RATING_RANK',
       'OFF_RATING_RANK', 'sp_work_OFF_RATING_RANK', 'E_DEF_RATING_RANK',
       'DEF_RATING_RANK', 'sp_work_DEF_RATING_RANK', 'E_NET_RATING_RANK',
       'NET_RATING_RANK', 'sp_work_NET_RATING_RANK', 'AST_PCT_RANK',
       'AST_TO_RANK', 'AST_RATIO_RANK', 'OREB_PCT_RANK', 'DREB_PCT_RANK',
       'REB_PCT_RANK', 'TM_TOV_PCT_RANK', 'E_TOV_PCT_RANK', 'EFG_

In [77]:
# output to csv file
# adv_filename = '13_19_players_adv_stats.csv'
# players_adv_df.to_csv(adv_filename)

## Misc Per 100 Stats

In [9]:
def get_misc_stats(out_df, in_season):
    """
    This function gathers misc player data from one season and
    combines it with the master advanced stats dataframe.
    """
    df = LeagueDashPlayerStats(season = in_season,
                                per_mode_detailed='Per100Possessions',
                                measure_type_detailed_defense='Misc').get_data_frames()[0]
    df['season'] = in_season
    #print(df.head())
    out_df = pd.concat([out_df, df])
    #print(out_df.head())
    return out_df

In [10]:
# Create misc stats dataframe for all seasons
players_misc_df = pd.DataFrame()

for season in seasons: 
    players_misc_df = get_misc_stats(players_misc_df, season)

In [11]:
list(players_misc_df.columns)

['PLAYER_ID',
 'PLAYER_NAME',
 'TEAM_ID',
 'TEAM_ABBREVIATION',
 'AGE',
 'GP',
 'W',
 'L',
 'W_PCT',
 'MIN',
 'PTS_OFF_TOV',
 'PTS_2ND_CHANCE',
 'PTS_FB',
 'PTS_PAINT',
 'OPP_PTS_OFF_TOV',
 'OPP_PTS_2ND_CHANCE',
 'OPP_PTS_FB',
 'OPP_PTS_PAINT',
 'BLK',
 'BLKA',
 'PF',
 'PFD',
 'NBA_FANTASY_PTS',
 'GP_RANK',
 'W_RANK',
 'L_RANK',
 'W_PCT_RANK',
 'MIN_RANK',
 'PTS_OFF_TOV_RANK',
 'PTS_2ND_CHANCE_RANK',
 'PTS_FB_RANK',
 'PTS_PAINT_RANK',
 'OPP_PTS_OFF_TOV_RANK',
 'OPP_PTS_2ND_CHANCE_RANK',
 'OPP_PTS_FB_RANK',
 'OPP_PTS_PAINT_RANK',
 'BLK_RANK',
 'BLKA_RANK',
 'PF_RANK',
 'PFD_RANK',
 'NBA_FANTASY_PTS_RANK',
 'CFID',
 'CFPARAMS',
 'season']

In [27]:
players_misc_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,NBA_FANTASY_PTS_RANK,CFID,CFPARAMS,season
0,201985,AJ Price,1610612750,MIN,27.0,28,15,13,0.536,47.1,...,291,472,436,1,22,453,354,5,2019851610612750,2013-14
1,201166,Aaron Brooks,1610612743,DEN,29.0,72,42,30,0.583,48.2,...,285,184,310,332,254,281,268,5,2011661610612743,2013-14
2,201189,Aaron Gray,1610612758,SAC,29.0,37,12,25,0.324,49.4,...,478,89,154,396,465,391,326,5,2011891610612758,2013-14
3,203519,Adonis Thomas,1610612755,PHI,21.0,6,2,4,0.333,51.5,...,12,348,436,387,454,345,420,5,2035191610612755,2013-14
4,1733,Al Harrington,1610612764,WAS,34.0,34,20,14,0.588,50.5,...,244,152,436,284,429,202,307,5,17331610612764,2013-14


In [174]:
players_adv_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,sp_work_PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK,CFID,CFPARAMS,season
0,201985,AJ Price,1610612750,MIN,27.0,28,15,13,0.536,3.5,...,25,220,409,410,435,440,336,5,2019851610612750,2013-14
1,201166,Aaron Brooks,1610612743,DEN,29.0,72,42,30,0.583,21.6,...,78,274,172,151,195,165,360,5,2011661610612743,2013-14
2,201189,Aaron Gray,1610612758,SAC,29.0,37,12,25,0.324,9.6,...,157,423,395,400,431,438,217,5,2011891610612758,2013-14
3,203519,Adonis Thomas,1610612755,PHI,21.0,6,2,4,0.333,6.3,...,234,433,440,446,397,406,272,5,2035191610612755,2013-14
4,1733,Al Harrington,1610612764,WAS,34.0,34,20,14,0.588,15.0,...,312,371,305,298,257,229,371,5,17331610612764,2013-14


In [192]:
# pick out points on fast break, second chance points, points in the paint, fouls drawn, block fg attempts
filtered_misc_df = players_misc_df[['PLAYER_ID','season', 'PTS_FB', 'PTS_PAINT', 'PFD', 'BLKA']]

# combine with adv df
combined_players_df = players_adv_df.merge(filtered_misc_df, on=['PLAYER_ID', 'season'], how='left')

In [193]:
combined_players_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK,CFID,CFPARAMS,season,PTS_FB,PTS_PAINT,PFD,BLKA
0,201985,AJ Price,1610612750,MIN,27.0,28,15,13,0.536,3.5,...,435,440,336,5,2019851610612750,2013-14,2.4,3.8,1.4,0.0
1,201166,Aaron Brooks,1610612743,DEN,29.0,72,42,30,0.583,21.6,...,195,165,360,5,2011661610612743,2013-14,2.2,6.5,3.3,1.1
2,201189,Aaron Gray,1610612758,SAC,29.0,37,12,25,0.324,9.6,...,431,438,217,5,2011891610612758,2013-14,0.3,6.7,2.2,1.4
3,203519,Adonis Thomas,1610612755,PHI,21.0,6,2,4,0.333,6.3,...,397,406,272,5,2035191610612755,2013-14,8.2,8.2,2.7,1.4
4,1733,Al Harrington,1610612764,WAS,34.0,34,20,14,0.588,15.0,...,257,229,371,5,17331610612764,2013-14,2.2,8.3,4.0,1.0


## Play type stats

In [132]:
playtypes = ['Isolation', 'Post Up', 'Pick & Roll Ball Handler', 'Pick & Roll Roll Man']

In [133]:
from nba_api.stats.endpoints import SynergyPlayTypes
# Min 10 min/game and 10 poss per play to qualify

# test1 = SynergyPlayTypes(season=curr_season,
#                             per_mode_simple='PerGame',
#                             play_type_nullable='Isolation',
#                             type_grouping_nullable='Offensive').get_data_frames()[0]

In [134]:
def playtype_data(playtype, in_season, out_df):
    df = SynergyPlayTypes(season = in_season,
                            per_mode_simple='PerGame',
                            play_type_nullable=playtype,
                            type_grouping_nullable='Offensive').get_data_frames()[0]

    df['season'] = in_season
    out_df = pd.concat([out_df, df])
    return out_df

In [99]:
aggregated_playtypes_df = pd.DataFrame()

# creates too many requests

# for play in playtypes:
#     for season in seasons:
#         aggregated_playtypes_df = playtype_data(play, season, aggregated_playtypes_df)

## Scoring adv stats

In [24]:
def scoring_adv_stats(out_df, in_season):
    df = LeagueDashPlayerStats(season = in_season,
                                measure_type_detailed_defense='Scoring').get_data_frames()[0]

    df['season'] = in_season
    out_df = pd.concat([out_df, df])
    return out_df

In [48]:
# Empty scoring stats df
players_scoring_df = pd.DataFrame()

for season in seasons:
    players_scoring_df = scoring_adv_stats(players_scoring_df, season)

In [49]:
list(players_scoring_df.columns)

['PLAYER_ID',
 'PLAYER_NAME',
 'TEAM_ID',
 'TEAM_ABBREVIATION',
 'AGE',
 'GP',
 'W',
 'L',
 'W_PCT',
 'MIN',
 'PCT_FGA_2PT',
 'PCT_FGA_3PT',
 'PCT_PTS_2PT',
 'PCT_PTS_2PT_MR',
 'PCT_PTS_3PT',
 'PCT_PTS_FB',
 'PCT_PTS_FT',
 'PCT_PTS_OFF_TOV',
 'PCT_PTS_PAINT',
 'PCT_AST_2PM',
 'PCT_UAST_2PM',
 'PCT_AST_3PM',
 'PCT_UAST_3PM',
 'PCT_AST_FGM',
 'PCT_UAST_FGM',
 'GP_RANK',
 'W_RANK',
 'L_RANK',
 'W_PCT_RANK',
 'MIN_RANK',
 'PCT_FGA_2PT_RANK',
 'PCT_FGA_3PT_RANK',
 'PCT_PTS_2PT_RANK',
 'PCT_PTS_2PT_MR_RANK',
 'PCT_PTS_3PT_RANK',
 'PCT_PTS_FB_RANK',
 'PCT_PTS_FT_RANK',
 'PCT_PTS_OFF_TOV_RANK',
 'PCT_PTS_PAINT_RANK',
 'PCT_AST_2PM_RANK',
 'PCT_UAST_2PM_RANK',
 'PCT_AST_3PM_RANK',
 'PCT_UAST_3PM_RANK',
 'PCT_AST_FGM_RANK',
 'PCT_UAST_FGM_RANK',
 'CFID',
 'CFPARAMS',
 'season']

In [79]:
players_scoring_df[players_scoring_df.columns[10:25]]

Unnamed: 0,PCT_FGA_2PT,PCT_FGA_3PT,PCT_PTS_2PT,PCT_PTS_2PT_MR,PCT_PTS_3PT,PCT_PTS_FB,PCT_PTS_FT,PCT_PTS_OFF_TOV,PCT_PTS_PAINT,PCT_AST_2PM,PCT_UAST_2PM,PCT_AST_3PM,PCT_UAST_3PM,PCT_AST_FGM,PCT_UAST_FGM
0,0.522,0.478,0.591,0.409,0.409,0.114,0.000,0.114,0.182,0.231,0.769,0.833,0.167,0.421,0.579
1,0.573,0.427,0.425,0.099,0.447,0.109,0.129,0.138,0.326,0.139,0.861,0.698,0.302,0.369,0.631
2,0.984,0.016,0.831,0.092,0.000,0.031,0.169,0.108,0.738,0.667,0.333,0.000,0.000,0.667,0.333
3,0.643,0.357,0.714,0.286,0.214,0.429,0.071,0.357,0.429,0.200,0.800,1.000,0.000,0.333,0.667
4,0.517,0.483,0.427,0.053,0.453,0.098,0.120,0.164,0.373,0.646,0.354,1.000,0.000,0.793,0.207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,0.717,0.283,0.550,0.098,0.241,0.139,0.209,0.162,0.452,0.332,0.668,0.525,0.475,0.375,0.625
526,0.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
527,0.979,0.021,0.637,0.127,0.000,0.060,0.363,0.131,0.509,0.624,0.376,0.000,0.000,0.624,0.376
528,0.529,0.471,0.400,0.050,0.450,0.050,0.150,0.200,0.350,0.500,0.500,0.667,0.333,0.571,0.429


In [194]:
filtered_scoring_df = pd.concat([players_scoring_df.PLAYER_ID, players_scoring_df.season, 
                        players_scoring_df[players_scoring_df.columns[10:25]]], axis=1)

combined_players_df = combined_players_df.merge(filtered_scoring_df, on=['PLAYER_ID','season'], how='left')

In [195]:
filtered_scoring_df.head()

Unnamed: 0,PLAYER_ID,season,PCT_FGA_2PT,PCT_FGA_3PT,PCT_PTS_2PT,PCT_PTS_2PT_MR,PCT_PTS_3PT,PCT_PTS_FB,PCT_PTS_FT,PCT_PTS_OFF_TOV,PCT_PTS_PAINT,PCT_AST_2PM,PCT_UAST_2PM,PCT_AST_3PM,PCT_UAST_3PM,PCT_AST_FGM,PCT_UAST_FGM
0,201985,2013-14,0.522,0.478,0.591,0.409,0.409,0.114,0.0,0.114,0.182,0.231,0.769,0.833,0.167,0.421,0.579
1,201166,2013-14,0.573,0.427,0.425,0.099,0.447,0.109,0.129,0.138,0.326,0.139,0.861,0.698,0.302,0.369,0.631
2,201189,2013-14,0.984,0.016,0.831,0.092,0.0,0.031,0.169,0.108,0.738,0.667,0.333,0.0,0.0,0.667,0.333
3,203519,2013-14,0.643,0.357,0.714,0.286,0.214,0.429,0.071,0.357,0.429,0.2,0.8,1.0,0.0,0.333,0.667
4,1733,2013-14,0.517,0.483,0.427,0.053,0.453,0.098,0.12,0.164,0.373,0.646,0.354,1.0,0.0,0.793,0.207


In [196]:
# Filter unique stats and combine with big df

combined_players_df.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,...,PCT_PTS_FB,PCT_PTS_FT,PCT_PTS_OFF_TOV,PCT_PTS_PAINT,PCT_AST_2PM,PCT_UAST_2PM,PCT_AST_3PM,PCT_UAST_3PM,PCT_AST_FGM,PCT_UAST_FGM
0,201985,AJ Price,1610612750,MIN,27.0,28,15,13,0.536,3.5,...,0.114,0.0,0.114,0.182,0.231,0.769,0.833,0.167,0.421,0.579
1,201166,Aaron Brooks,1610612743,DEN,29.0,72,42,30,0.583,21.6,...,0.109,0.129,0.138,0.326,0.139,0.861,0.698,0.302,0.369,0.631
2,201189,Aaron Gray,1610612758,SAC,29.0,37,12,25,0.324,9.6,...,0.031,0.169,0.108,0.738,0.667,0.333,0.0,0.0,0.667,0.333
3,203519,Adonis Thomas,1610612755,PHI,21.0,6,2,4,0.333,6.3,...,0.429,0.071,0.357,0.429,0.2,0.8,1.0,0.0,0.333,0.667
4,1733,Al Harrington,1610612764,WAS,34.0,34,20,14,0.588,15.0,...,0.098,0.12,0.164,0.373,0.646,0.354,1.0,0.0,0.793,0.207


In [197]:
combined_players_stats_filename = '2013-19_players_assorted_stats.csv'
combined_players_df.to_csv(combined_players_stats_filename)

## Shooting stats

Good for offensive clustering
Note: DateFrame retrieval is bugged right now.

In [1]:
# from nba_api.stats.endpoints import LeagueDashPlayerShotLocations

# shot_locs = partial(LeagueDashPlayerShotLocations, distance_range='By Zone', measure_type_simple='Base',
#                         per_mode_detailed='PerGame')

# shot_locs_season = shot_locs(season=curr_season)

In [108]:
#shot_locs_season.get_data_frames()
# bug here; report to api issues

ValueError: 2 columns passed, passed data had 26 columns

## Defensive dashboards

In [12]:
from nba_api.stats.endpoints import LeagueDashPtDefend

In [13]:
# Defending > 15 ft
def get_perimeter_defs(in_season, out_df):
    df = LeagueDashPtDefend(defense_category='3 Pointers', 
                                        per_mode_simple='PerGame',
                                        season=in_season).get_data_frames()[0]
    df['season'] = in_season
    out_df = pd.concat([out_df, df])
    return out_df

In [15]:
# create empty data frame
perimeter_def_df = pd.DataFrame()
# loop through seasons
for season in seasons:
    perimeter_def_df = get_perimeter_defs(season, perimeter_def_df)

In [16]:
perimeter_def_df.head()

Unnamed: 0,CLOSE_DEF_PERSON_ID,PLAYER_NAME,PLAYER_LAST_TEAM_ID,PLAYER_LAST_TEAM_ABBREVIATION,PLAYER_POSITION,AGE,GP,G,FREQ,FG3M,FG3A,FG3_PCT,NS_FG3_PCT,PLUSMINUS,season
0,201950,Jrue Holiday,1610612740,NOP,G,24.0,34,34,0.466,1.88,5.24,0.36,0.356,0.004,2013-14
1,202689,Kemba Walker,1610612766,CHA,G,24.0,72,71,0.364,2.0,4.94,0.404,0.351,0.053,2013-14
2,202681,Kyrie Irving,1610612739,CLE,G,22.0,71,70,0.451,1.83,4.87,0.376,0.36,0.016,2013-14
3,201935,James Harden,1610612745,HOU,G,24.0,73,71,0.394,1.68,4.81,0.35,0.354,-0.004,2013-14
4,203079,Dion Waiters,1610612739,CLE,G,22.0,70,70,0.493,1.67,4.54,0.368,0.359,0.009,2013-14


In [17]:
# Change freq label
perimeter_def_df.rename(columns={'FREQ': '3P_FREQ'}, inplace=True)

In [18]:
# Change +/- label
perimeter_def_df.rename(columns={'PLUSMINUS': '3P_PLUSMINUS'}, inplace=True)

In [19]:
# Defending within 6 ft
# Defending > 15 ft
def get_interior_defs(in_season, out_df):
    df = LeagueDashPtDefend(defense_category='Less Than 6Ft', 
                                        per_mode_simple='PerGame',
                                        season=in_season).get_data_frames()[0]
    df['season'] = in_season
    out_df = pd.concat([out_df, df])
    return out_df

In [20]:
# create empty data frame
interior_def_df = pd.DataFrame()
# loop through seasons
for season in seasons:
    interior_def_df = get_interior_defs(season, interior_def_df)

In [21]:
interior_def_df.head()

Unnamed: 0,CLOSE_DEF_PERSON_ID,PLAYER_NAME,PLAYER_LAST_TEAM_ID,PLAYER_LAST_TEAM_ABBREVIATION,PLAYER_POSITION,AGE,GP,G,FREQ,FGM_LT_06,FGA_LT_06,LT_06_PCT,NS_LT_06_PCT,PLUSMINUS,season
0,201577,Robin Lopez,1610612757,POR,C,26.0,82,82,0.488,3.66,7.83,0.467,0.594,-0.127,2013-14
1,201586,Serge Ibaka,1610612760,OKC,F-C,24.0,81,81,0.489,3.74,7.67,0.488,0.599,-0.111,2013-14
2,2200,Pau Gasol,1610612747,LAL,C-F,33.0,60,60,0.518,4.53,7.58,0.598,0.603,-0.005,2013-14
3,201599,DeAndre Jordan,1610612746,LAC,C,25.0,81,81,0.452,4.23,7.56,0.56,0.601,-0.04,2013-14
4,101162,Marcin Gortat,1610612764,WAS,C,30.0,80,79,0.494,4.08,7.55,0.54,0.597,-0.057,2013-14


In [22]:
# change freq labels
interior_def_df.rename(columns={'FREQ': '6FT_FREQ'}, inplace=True)

In [23]:
interior_def_df.rename(columns={'PLUSMINUS': '6FT_PLUSMINUS'}, inplace=True)

In [24]:
interior_def_df.head()

Unnamed: 0,CLOSE_DEF_PERSON_ID,PLAYER_NAME,PLAYER_LAST_TEAM_ID,PLAYER_LAST_TEAM_ABBREVIATION,PLAYER_POSITION,AGE,GP,G,6FT_FREQ,FGM_LT_06,FGA_LT_06,LT_06_PCT,NS_LT_06_PCT,6FT_PLUSMINUS,season
0,201577,Robin Lopez,1610612757,POR,C,26.0,82,82,0.488,3.66,7.83,0.467,0.594,-0.127,2013-14
1,201586,Serge Ibaka,1610612760,OKC,F-C,24.0,81,81,0.489,3.74,7.67,0.488,0.599,-0.111,2013-14
2,2200,Pau Gasol,1610612747,LAL,C-F,33.0,60,60,0.518,4.53,7.58,0.598,0.603,-0.005,2013-14
3,201599,DeAndre Jordan,1610612746,LAC,C,25.0,81,81,0.452,4.23,7.56,0.56,0.601,-0.04,2013-14
4,101162,Marcin Gortat,1610612764,WAS,C,30.0,80,79,0.494,4.08,7.55,0.54,0.597,-0.057,2013-14


In [25]:
perimeter_def_df[perimeter_def_df.columns[9:14]]

Unnamed: 0,FG3M,FG3A,FG3_PCT,NS_FG3_PCT,3P_PLUSMINUS
0,1.88,5.24,0.360,0.356,0.004
1,2.00,4.94,0.404,0.351,0.053
2,1.83,4.87,0.376,0.360,0.016
3,1.68,4.81,0.350,0.354,-0.004
4,1.67,4.54,0.368,0.359,0.009
...,...,...,...,...,...
512,0.00,0.50,0.000,0.397,-0.397
513,0.00,0.50,0.000,0.296,-0.296
514,0.18,0.47,0.375,0.362,0.013
515,0.20,0.40,0.500,0.199,0.301


In [26]:
perimeter_def_df.tail()

Unnamed: 0,CLOSE_DEF_PERSON_ID,PLAYER_NAME,PLAYER_LAST_TEAM_ID,PLAYER_LAST_TEAM_ABBREVIATION,PLAYER_POSITION,AGE,GP,G,3P_FREQ,FG3M,FG3A,FG3_PCT,NS_FG3_PCT,3P_PLUSMINUS,season
512,201281,Andre Ingram,1610612747,LAL,G,33.0,2,1,0.333,0.0,0.5,0.0,0.397,-0.397,2018-19
513,1628505,Troy Caupain,1610612753,ORL,G,23.0,4,2,0.286,0.0,0.5,0.0,0.296,-0.296,2018-19
514,203101,Miles Plumlee,1610612737,ATL,C,30.0,17,7,0.1,0.18,0.47,0.375,0.362,0.013,2018-19
515,1629058,Dzanan Musa,1610612751,BKN,G-F,20.0,5,2,0.222,0.2,0.4,0.5,0.199,0.301,2018-19
516,1628463,Tyler Cavanaugh,1610612762,UTA,F,25.0,4,1,0.143,0.0,0.25,0.0,0.417,-0.417,2018-19


In [27]:
filtered_perimeter_def_df = pd.concat([perimeter_def_df.PLAYER_NAME, perimeter_def_df.season, perimeter_def_df[perimeter_def_df.columns[8:14]]], axis=1)

In [28]:
filtered_perimeter_def_df

Unnamed: 0,PLAYER_NAME,season,3P_FREQ,FG3M,FG3A,FG3_PCT,NS_FG3_PCT,3P_PLUSMINUS
0,Jrue Holiday,2013-14,0.466,1.88,5.24,0.360,0.356,0.004
1,Kemba Walker,2013-14,0.364,2.00,4.94,0.404,0.351,0.053
2,Kyrie Irving,2013-14,0.451,1.83,4.87,0.376,0.360,0.016
3,James Harden,2013-14,0.394,1.68,4.81,0.350,0.354,-0.004
4,Dion Waiters,2013-14,0.493,1.67,4.54,0.368,0.359,0.009
...,...,...,...,...,...,...,...,...
512,Andre Ingram,2018-19,0.333,0.00,0.50,0.000,0.397,-0.397
513,Troy Caupain,2018-19,0.286,0.00,0.50,0.000,0.296,-0.296
514,Miles Plumlee,2018-19,0.100,0.18,0.47,0.375,0.362,0.013
515,Dzanan Musa,2018-19,0.222,0.20,0.40,0.500,0.199,0.301


In [29]:
# Combine appropriate statistics
combined_def_df = interior_def_df.merge(filtered_perimeter_def_df, on=['PLAYER_NAME', 'season'])

In [30]:
len(combined_def_df)

2933

In [31]:
combined_def_df.columns

Index(['CLOSE_DEF_PERSON_ID', 'PLAYER_NAME', 'PLAYER_LAST_TEAM_ID',
       'PLAYER_LAST_TEAM_ABBREVIATION', 'PLAYER_POSITION', 'AGE', 'GP', 'G',
       '6FT_FREQ', 'FGM_LT_06', 'FGA_LT_06', 'LT_06_PCT', 'NS_LT_06_PCT',
       '6FT_PLUSMINUS', 'season', '3P_FREQ', 'FG3M', 'FG3A', 'FG3_PCT',
       'NS_FG3_PCT', '3P_PLUSMINUS'],
      dtype='object')

In [34]:
len(players_adv_df)

3006

In [35]:
len(combined_def_df)

2933

In [40]:
players_adv_df[['PLAYER_ID', 'DREB_PCT']]

Unnamed: 0,PLAYER_ID,DREB_PCT
0,201985,0.075
1,201166,0.061
2,201189,0.199
3,203519,0.079
4,1733,0.138
...,...,...
525,203897,0.113
526,1629155,0.000
527,2585,0.184
528,1629015,0.087


In [41]:
# Add current defensive rebounding % to defensive stats
combined_def_df = combined_def_df.merge(players_adv_df[['PLAYER_ID','PLAYER_NAME', 'DREB_PCT']], 
                                            on=['PLAYER_ID', 'PLAYER_NAME'], how='left', co)

# Add blocks and blocks attempted
# combined_def_df['BLK'] = players_misc_df['BLK']
# combined_def_df['BLKA'] = players_misc_df['BLKA']

KeyError: 'PLAYER_ID'

In [204]:
def_filename = '2013-19_shot_defenses.csv'
combined_def_df.to_csv(def_filename)

# Gather data for current season

## Offensive statistics

In [13]:
# create empty dataframe for combined statistics
curr_off_stats = pd.DataFrame()

# working dataframes
curr_adv_stats = pd.DataFrame()
curr_misc_stats = pd.DataFrame()
curr_scoring_stats = pd.DataFrame()

curr_interior_def_stats = pd.DataFrame()
curr_peri_def_stats = pd.DataFrame()

In [28]:
curr_adv_stats = get_adv_stats(out_df=curr_adv_stats, in_season=curr_season)
curr_misc_stats = get_misc_stats(out_df=curr_misc_stats, in_season=curr_season)
curr_scoring_stats = scoring_adv_stats(out_df=curr_scoring_stats, in_season=curr_season)

In [35]:
curr_filtered_scoring = pd.concat([curr_scoring_stats.PLAYER_ID, curr_scoring_stats.season, 
                        curr_scoring_stats[curr_scoring_stats.columns[10:25]]], axis=1)

In [38]:
# combine offensive dataframes

curr_filtered_misc = curr_misc_stats[['PLAYER_ID','season', 'PTS_FB', 'PTS_PAINT', 'PFD', 'BLKA']]
curr_filtered_scoring = pd.concat([curr_scoring_stats.PLAYER_ID, curr_scoring_stats.season, 
                        curr_scoring_stats[curr_scoring_stats.columns[10:25]]], axis=1)

# merge dataframes
curr_off_stats = curr_adv_stats.merge(curr_filtered_misc, on=['PLAYER_ID', 'season'], how='left')
curr_off_stats = curr_off_stats.merge(curr_filtered_scoring, on=['PLAYER_ID', 'season'], how='left')

## Defensive statistics

In [33]:
curr_interior_def_stats = get_interior_defs(in_season=curr_season, out_df=curr_interior_def_stats)
curr_peri_def_stats = get_perimeter_defs(in_season=curr_season, out_df = curr_peri_def_stats)

In [39]:
curr_filtered_perimeter_def_df = pd.concat([curr_peri_def_stats.PLAYER_NAME, curr_peri_def_stats.season, 
                                            curr_peri_def_stats[curr_peri_def_stats.columns[8:14]]], axis=1)

In [40]:
# combine defensive dataframes
curr_def_stats = pd.DataFrame()

curr_def_stats = curr_interior_def_stats.merge(curr_filtered_perimeter_def_df, on=['PLAYER_NAME', 'season'], how='left')

In [None]:
# Add blocks and def % stats
# Add current defensive rebounding % to defensive stats
curr_def_stats['DREB_PCT'] = curr_off_stats['DREB_PCT']

# Add blocks and blocks attempted
curr_def_stats['BLK'] = curr_off_stats['BLK']
curr_def_stats['BLKA'] = curr_off_stats['BLKA']

In [41]:
# Export to csv files
curr_off_filename = '2019-20_offensive_stats.csv'
curr_def_filename = '2019-20_defensive_stats.csv'

curr_off_stats.to_csv(curr_off_filename)
curr_def_stats.to_csv(curr_def_filename)