# Libraries

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import os
import time

# Endpoints from NBA API
from nba_api.stats.endpoints import playbyplayv2
from nba_api.stats.endpoints import gamerotation
from nba_api.stats.endpoints import shotchartdetail
from nba_api.stats.endpoints import teamplayerdashboard
from nba_api.stats.endpoints import winprobabilitypbp

# Static Imports from NBA API
from nba_api.stats.static import players
from nba_api.stats.static import teams

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# User Libraries
import acquire_rm

# Helpful Stuff
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)



#### Loading a shot detail for a game by player 

Acquiring a dataframe with all shots:

In [2]:
df_shots = acquire_rm.all_21_22_shots()

Taking a sample game to analyze - need team_id, player_id and a game_id:

In [4]:
team_id = teams.find_team_by_abbreviation('SAS')['id'] # For SA Spurs

In [5]:
player_id = players.find_players_by_full_name("Keldon Johnson")[0]['id'] # For Keldon Johnson

In [6]:
game_id = '0022100010' # First game of year Spurs vs. Magic

Now create a base dataframe object from win probability:

In [7]:
df_base = winprobabilitypbp.WinProbabilityPBP(game_id).get_data_frames()[0]

In [9]:
df_base = acquire_rm.get_absolute_time(df_base)

In [10]:
df_base

Unnamed: 0,GAME_ID,EVENT_NUM,HOME_PCT,VISITOR_PCT,HOME_PTS,VISITOR_PTS,HOME_SCORE_MARGIN,PERIOD,SECONDS_REMAINING,HOME_POSS_IND,HOME_G,DESCRIPTION,LOCATION,PCTIMESTRING,ISVISIBLE,abs_time
0,0022100010,2.0,0.58808,0.41192,0,0,0,1,720.0,,,Start of 1st Period (8:43 PM EST),,12:00,1.0,0.0
1,0022100010,4.0,0.61352,0.38648,0,0,0,1,720.0,1.0,,Jump Ball Poeltl vs. Bamba: Tip to Johnson,h,12:00,1.0,0.0
2,0022100010,,0.61350,0.38650,0,0,0,1,719.0,1.0,,,,,,1.0
3,0022100010,,0.61348,0.38652,0,0,0,1,718.0,1.0,,,,,,2.0
4,0022100010,,0.61346,0.38654,0,0,0,1,717.0,1.0,,,,,,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3056,0022100010,,,,123,97,26,4,4.0,1.0,,,,,,2876.0
3057,0022100010,,,,123,97,26,4,3.0,1.0,,,,,,2877.0
3058,0022100010,,,,123,97,26,4,2.0,1.0,,,,,,2878.0
3059,0022100010,,,,123,97,26,4,1.0,1.0,,,,,,2879.0


Let's use an edge case to test the absolute time column.

In [None]:
df_timer = winprobabilitypbp.WinProbabilityPBP('0022100291').get_data_frames()[0]

In [None]:
df_timer['abs_time'] = np.where(df_timer.PERIOD <5, ((df_timer.PERIOD - 1) * 720 + (720 - df_timer.SECONDS_REMAINING)), (2880 + (df_timer.PERIOD - 5) * 300 + (300 - df_timer.SECONDS_REMAINING)))

In [None]:
df_timer[3000:3010]

# Back to the original df creation - add in PLAYER ROTATION

#### We have a single games win probability df with absolutely gametime and can now merge in with PLAYER ROTATION

In [11]:
df_rotation = gamerotation.GameRotation(game_id).get_data_frames()

In [14]:
df_rotation[0]

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_CITY,TEAM_NAME,PERSON_ID,PLAYER_FIRST,PLAYER_LAST,IN_TIME_REAL,OUT_TIME_REAL,PLAYER_PTS,PT_DIFF,USG_PCT
0,22100010,1610612753,Orlando,Magic,201577,Robin,Lopez,9190.0,11910.0,2,-6.0,0.25
1,22100010,1610612753,Orlando,Magic,201577,Robin,Lopez,16430.0,21600.0,2,-13.0,0.158
2,22100010,1610612753,Orlando,Magic,203082,Terrence,Ross,2550.0,8560.0,3,-11.0,0.12
3,22100010,1610612753,Orlando,Magic,203082,Terrence,Ross,9830.0,14400.0,4,-7.0,0.211
4,22100010,1610612753,Orlando,Magic,203082,Terrence,Ross,18170.0,24000.0,8,-14.0,0.227
5,22100010,1610612753,Orlando,Magic,1628964,Mo,Bamba,0.0,3930.0,5,0.0,0.188
6,22100010,1610612753,Orlando,Magic,1628964,Mo,Bamba,7200.0,9190.0,3,1.0,0.167
7,22100010,1610612753,Orlando,Magic,1628964,Mo,Bamba,13200.0,16430.0,3,3.0,0.214
8,22100010,1610612753,Orlando,Magic,1628964,Mo,Bamba,21600.0,28800.0,7,-3.0,0.222
9,22100010,1610612753,Orlando,Magic,1628976,Wendell,Carter Jr.,0.0,2950.0,4,6.0,0.25


##### Ok, we we have to feed in if the player is home vs away.
Or, we can have it go through each dataset to find the player id.  If it's not in the first, go to the second.

In [15]:
for i in range(2):
    for player in df_rotation[i].PERSON_ID:
        if player == player_id:
            df_player_roto = df_rotation[i][df_rotation[i].PERSON_ID == player]

In [16]:
df_player_roto

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_CITY,TEAM_NAME,PERSON_ID,PLAYER_FIRST,PLAYER_LAST,IN_TIME_REAL,OUT_TIME_REAL,PLAYER_PTS,PT_DIFF,USG_PCT
28,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,0.0,5010.0,6,-2.0,0.238
29,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,8560.0,13220.0,7,9.0,0.32
30,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,14400.0,18170.0,2,0.0,0.308
31,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,22710.0,25860.0,0,-3.0,0.0


#### Nice!  We now have a way to pull in the rotation df

In [17]:
df_player_roto['abs_in_time'] = df_player_roto.IN_TIME_REAL/10

In [18]:
df_player_roto['abs_out_time'] = df_player_roto.OUT_TIME_REAL/10

Need to reset index

In [19]:
df_player_roto.reset_index(drop = 'True')

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_CITY,TEAM_NAME,PERSON_ID,PLAYER_FIRST,PLAYER_LAST,IN_TIME_REAL,OUT_TIME_REAL,PLAYER_PTS,PT_DIFF,USG_PCT,abs_in_time,abs_out_time
0,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,0.0,5010.0,6,-2.0,0.238,0.0,501.0
1,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,8560.0,13220.0,7,9.0,0.32,856.0,1322.0
2,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,14400.0,18170.0,2,0.0,0.308,1440.0,1817.0
3,22100010,1610612759,San Antonio,Spurs,1629640,Keldon,Johnson,22710.0,25860.0,0,-3.0,0.0,2271.0,2586.0


In [20]:
df_player_roto_times = df_player_roto[['abs_in_time','abs_out_time']].reset_index(drop = 'True')

In [21]:
df_player_roto_times

Unnamed: 0,abs_in_time,abs_out_time
0,0.0,501.0
1,856.0,1322.0
2,1440.0,1817.0
3,2271.0,2586.0


In [22]:
zipped = list(zip(df_player_roto_times.abs_in_time, df_player_roto_times.abs_out_time))

In [23]:
zipped

[(0.0, 501.0), (856.0, 1322.0), (1440.0, 1817.0), (2271.0, 2586.0)]

In [25]:
# Let me create a holder dataframe as I pull slices off from the base
df_player_game = pd.DataFrame()
for tuplez in zipped:
    df_slice = df_base[(df_base.abs_time >= tuplez[0]) & (df_base.abs_time <= tuplez[1])]
    df_player_game = pd.concat([df_player_game, df_slice])
df_player_game

Unnamed: 0,GAME_ID,EVENT_NUM,HOME_PCT,VISITOR_PCT,HOME_PTS,VISITOR_PTS,HOME_SCORE_MARGIN,PERIOD,SECONDS_REMAINING,HOME_POSS_IND,HOME_G,DESCRIPTION,LOCATION,PCTIMESTRING,ISVISIBLE,abs_time
0,0022100010,2.0,0.58808,0.41192,0,0,0,1,720.0,,,Start of 1st Period (8:43 PM EST),,12:00,1.0,0.0
1,0022100010,4.0,0.61352,0.38648,0,0,0,1,720.0,1.0,,Jump Ball Poeltl vs. Bamba: Tip to Johnson,h,12:00,1.0,0.0
2,0022100010,,0.61350,0.38650,0,0,0,1,719.0,1.0,,,,,,1.0
3,0022100010,,0.61348,0.38652,0,0,0,1,718.0,1.0,,,,,,2.0
4,0022100010,,0.61346,0.38654,0,0,0,1,717.0,1.0,,,,,,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2749,0022100010,632.0,,,109,84,25,4,294.0,1.0,,SPURS Rebound,h,4:54,1.0,2586.0
2750,0022100010,633.0,,,109,84,25,4,294.0,1.0,,Out-of-Bounds,,4:54,0.0,2586.0
2751,0022100010,634.0,,,109,84,25,4,294.0,1.0,,SUB: Bates-Diop FOR Johnson,h,4:54,1.0,2586.0
2752,0022100010,635.0,,,109,84,25,4,294.0,1.0,,SUB: Primo FOR White,h,4:54,1.0,2586.0


## Now I have to add in shot details...

In [31]:
df_shots = shotchartdetail.ShotChartDetail(team_id, player_id, season_type_all_star='Regular Season',
                                                        season_nullable='2021-22',
                                                        context_measure_simple = 'FGA').get_data_frames()[0]

In [33]:
df_shots['abs_time'] = np.where(df_shots.PERIOD < 5,
                                (df_shots.PERIOD - 1) * 720 + (720 - (60 * df_shots.MINUTES_REMAINING) - (df_shots.SECONDS_REMAINING)),
                                2880 + ((df_shots.PERIOD - 5) * 300) + (300 - (60 * df_shots.MINUTES_REMAINING) - (df_shots.SECONDS_REMAINING)))

In [34]:
df_shots

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM,abs_time
0,Shot Chart Detail,0022100010,20,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,10,27,Made Shot,Pullup Jump shot,2PT Field Goal,Mid-Range,Left Side(L),16-24 ft.,16,-134,97,1,1,20211020,SAS,ORL,93
1,Shot Chart Detail,0022100010,70,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,6,52,Made Shot,Reverse Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,-2,-4,1,1,20211020,SAS,ORL,308
2,Shot Chart Detail,0022100010,81,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,5,56,Missed Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-20,6,1,0,20211020,SAS,ORL,364
3,Shot Chart Detail,0022100010,212,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,9,11,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-23,7,1,0,20211020,SAS,ORL,889
4,Shot Chart Detail,0022100010,234,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,8,29,Missed Shot,Driving Hook Shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,7,-44,59,1,0,20211020,SAS,ORL,931
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1006,Shot Chart Detail,0022101219,410,1629640,Keldon Johnson,1610612759,San Antonio Spurs,3,5,15,Missed Shot,Pullup Jump shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,7,58,49,1,0,20220410,DAL,SAS,1845
1007,Shot Chart Detail,0022101219,493,1629640,Keldon Johnson,1610612759,San Antonio Spurs,4,11,46,Made Shot,Jump Shot,2PT Field Goal,In The Paint (Non-RA),Center(C),8-16 ft.,8,34,83,1,1,20220410,DAL,SAS,2174
1008,Shot Chart Detail,0022101219,535,1629640,Keldon Johnson,1610612759,San Antonio Spurs,4,8,11,Made Shot,Pullup Jump shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,7,-53,55,1,1,20220410,DAL,SAS,2389
1009,Shot Chart Detail,0022101219,563,1629640,Keldon Johnson,1610612759,San Antonio Spurs,4,6,6,Made Shot,Pullup Jump shot,3PT Field Goal,Above the Break 3,Center(C),24+ ft.,25,-27,253,1,1,20220410,DAL,SAS,2514


#### Can we merge these now?...

Need to pull the specific game from shots:

In [35]:
df_game_shots = df_shots[df_shots.GAME_ID == game_id]

In [36]:
df_game_shots

Unnamed: 0,GRID_TYPE,GAME_ID,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD,MINUTES_REMAINING,SECONDS_REMAINING,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM,abs_time
0,Shot Chart Detail,22100010,20,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,10,27,Made Shot,Pullup Jump shot,2PT Field Goal,Mid-Range,Left Side(L),16-24 ft.,16,-134,97,1,1,20211020,SAS,ORL,93
1,Shot Chart Detail,22100010,70,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,6,52,Made Shot,Reverse Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,-2,-4,1,1,20211020,SAS,ORL,308
2,Shot Chart Detail,22100010,81,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,5,56,Missed Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-20,6,1,0,20211020,SAS,ORL,364
3,Shot Chart Detail,22100010,212,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,9,11,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-23,7,1,0,20211020,SAS,ORL,889
4,Shot Chart Detail,22100010,234,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,8,29,Missed Shot,Driving Hook Shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,7,-44,59,1,0,20211020,SAS,ORL,931
5,Shot Chart Detail,22100010,269,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Made Shot,Tip Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,0,1,1,20211020,SAS,ORL,1067
6,Shot Chart Detail,22100010,267,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,9,2,1,0,20211020,SAS,ORL,1067
7,Shot Chart Detail,22100010,330,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,2,48,Made Shot,Driving Floating Jump Shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,6,2,65,1,1,20211020,SAS,ORL,1272
8,Shot Chart Detail,22100010,337,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,2,16,Made Shot,Pullup Jump shot,2PT Field Goal,Mid-Range,Left Side(L),8-16 ft.,15,-157,17,1,1,20211020,SAS,ORL,1304
9,Shot Chart Detail,22100010,414,1629640,Keldon Johnson,1610612759,San Antonio Spurs,3,9,45,Made Shot,Cutting Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,23,6,1,1,20211020,SAS,ORL,1575


In [37]:
df_almostthere = df_player_game.merge(df_game_shots, how = 'inner', on = 'abs_time')

Need to add in total playtime for each point in almost there, time since last rest.  Then need to identify if they are home or visitors and then use that to give margin.
Need to add in cluster!
Clean Up

In [38]:
df_almostthere

Unnamed: 0,GAME_ID_x,EVENT_NUM,HOME_PCT,VISITOR_PCT,HOME_PTS,VISITOR_PTS,HOME_SCORE_MARGIN,PERIOD_x,SECONDS_REMAINING_x,HOME_POSS_IND,HOME_G,DESCRIPTION,LOCATION,PCTIMESTRING,ISVISIBLE,abs_time,GRID_TYPE,GAME_ID_y,GAME_EVENT_ID,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_NAME,PERIOD_y,MINUTES_REMAINING,SECONDS_REMAINING_y,EVENT_TYPE,ACTION_TYPE,SHOT_TYPE,SHOT_ZONE_BASIC,SHOT_ZONE_AREA,SHOT_ZONE_RANGE,SHOT_DISTANCE,LOC_X,LOC_Y,SHOT_ATTEMPTED_FLAG,SHOT_MADE_FLAG,GAME_DATE,HTM,VTM
0,22100010,20.0,0.55487,0.44513,5,7,-2,1,627.0,0.0,,Johnson 17' Pullup Jump Shot (3 PTS),h,10:27,1.0,93.0,Shot Chart Detail,22100010,20,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,10,27,Made Shot,Pullup Jump shot,2PT Field Goal,Mid-Range,Left Side(L),16-24 ft.,16,-134,97,1,1,20211020,SAS,ORL
1,22100010,70.0,0.50397,0.49603,13,17,-4,1,412.0,0.0,,Johnson Reverse Dunk (6 PTS) (White 1 AST),h,6:52,1.0,308.0,Shot Chart Detail,22100010,70,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,6,52,Made Shot,Reverse Dunk Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,-2,-4,1,1,20211020,SAS,ORL
2,22100010,81.0,0.52545,0.47455,13,17,-4,1,356.0,1.0,,MISS Johnson 2' Driving Layup,h,5:56,1.0,364.0,Shot Chart Detail,22100010,81,1629640,Keldon Johnson,1610612759,San Antonio Spurs,1,5,56,Missed Shot,Driving Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-20,6,1,0,20211020,SAS,ORL
3,22100010,212.0,0.67537,0.32463,39,36,3,2,551.0,1.0,,MISS Johnson 2' Layup#@#Bamba BLOCK (2 BLK),h,9:11,1.0,889.0,Shot Chart Detail,22100010,212,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,9,11,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,2,-23,7,1,0,20211020,SAS,ORL
4,22100010,234.0,0.67639,0.32361,39,36,3,2,509.0,1.0,,MISS Johnson 7' Driving Hook Shot,h,8:29,1.0,931.0,Shot Chart Detail,22100010,234,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,8,29,Missed Shot,Driving Hook Shot,2PT Field Goal,In The Paint (Non-RA),Center(C),Less Than 8 ft.,7,-44,59,1,0,20211020,SAS,ORL
5,22100010,267.0,0.73307,0.26693,43,38,5,2,373.0,1.0,,MISS Johnson 1' Layup,h,6:13,1.0,1067.0,Shot Chart Detail,22100010,269,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Made Shot,Tip Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,0,1,1,20211020,SAS,ORL
6,22100010,267.0,0.73307,0.26693,43,38,5,2,373.0,1.0,,MISS Johnson 1' Layup,h,6:13,1.0,1067.0,Shot Chart Detail,22100010,267,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,9,2,1,0,20211020,SAS,ORL
7,22100010,268.0,0.73307,0.26693,43,38,5,2,373.0,1.0,,Johnson REBOUND (Off:2 Def:1),h,6:13,1.0,1067.0,Shot Chart Detail,22100010,269,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Made Shot,Tip Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,0,1,1,20211020,SAS,ORL
8,22100010,268.0,0.73307,0.26693,43,38,5,2,373.0,1.0,,Johnson REBOUND (Off:2 Def:1),h,6:13,1.0,1067.0,Shot Chart Detail,22100010,267,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Missed Shot,Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,9,2,1,0,20211020,SAS,ORL
9,22100010,269.0,0.76172,0.23828,45,38,7,2,373.0,0.0,,Johnson Tip Layup Shot (8 PTS),h,6:13,1.0,1067.0,Shot Chart Detail,22100010,269,1629640,Keldon Johnson,1610612759,San Antonio Spurs,2,6,13,Made Shot,Tip Layup Shot,2PT Field Goal,Restricted Area,Center(C),Less Than 8 ft.,0,0,0,1,1,20211020,SAS,ORL


# Step 1 - Create df with player_id | team_id

### We need to create a data structure that holds a list of all player_ids and their team_id for players in the 2021-2022 regular season.  Note that this needs to have players who were on multiple teams during the season.

#### team_id List

In [None]:
df_teams = pd.DataFrame(teams.get_teams())
team_id_list = list(df_teams.id)

In [None]:
#team_id_list

#### Save the player_id/team_id combo to a csv

In [None]:
filename = 'team_player_ids.csv'
if os.path.isfile(filename):
    team_player_ids =  pd.read_csv(filename, index_col=0)
else:
    players_list = []
    for team in team_list:
        df_tpd = teamplayerdashboard.TeamPlayerDashboard(team,
                                                         season = '2021-22').get_data_frames()
        player_list = list(df_tpd[1].PLAYER_ID)
        for player in player_list:
            row = [team,player]
            players_list.append(row)
    team_player_ids = pd.DataFrame(players_list, columns = ['team_id','player_id'])
    team_player_ids.to_csv('team_player_ids.csv')

In [None]:
team_player_ids.info()
team_player_ids.head()

Out of curiosity, check to see how many players were on multiple teams:

In [None]:
team_player_ids.player_id.value_counts()[team_player_ids.player_id.value_counts() > 1]

97 players player for multiple teams:
- 1 player played for four teams (202328)
- 11 players played for 3 teams
- 85 players played for 2 teams

In [None]:
for record in players.get_players():
    if record['id'] == 202328:
        print(record)

#### ^Checks out - Greg Monroe played for 4 teams in 2022 (technically traded 4 times, but two were the same team)

# Step 2 - Use id list from above to get a shot chart.

Single player, regular season, 3pt shots in last 1-minute of play.

In [None]:
a = shotchartdetail.ShotChartDetail(team_id = 1610612737,
                                player_id = 203992,
                                season_type_all_star='Regular Season',
                                season_nullable='2021-22',
                                clutch_time_nullable='Last 1 Minute',
                                context_measure_simple = 'FG3A').get_data_frames()[0]

In [None]:
sns.relplot(data = a, x='LOC_X', y='LOC_Y', hue = 'SHOT_MADE_FLAG')

#### Create a list of lists, each sublist is [team_id, player_id]

In [None]:
players_list = team_player_ids.values.tolist()

In [None]:
filename2 = 'all_last_season_shots.csv'
if os.path.isfile(filename2):
    df =  pd.read_csv(filename2, index_col=0)
else:
    df = pd.DataFrame()
    index = 0
    for player in players_list:
        print(f'\rFetching index {index} of 714', end='')
        df_pl = shotchartdetail.ShotChartDetail(team_id = player[0],
                                                        player_id = player[1],
                                                        season_type_all_star='Regular Season',
                                                        season_nullable='2021-22',
                                                        context_measure_simple = 'FGA').get_data_frames()
        time.sleep(.5)
        index += 1
        df = pd.concat([df, df_pl[0]])

In [None]:
df.to_csv('all_last_season_shots.csv')

In [None]:
df[df.SHOT_TYPE == '3PT Field Goal']

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
df

In [None]:
plt.figure(figsize = (6,9))
sns.scatterplot(data = df, x = 'LOC_X', y = 'LOC_Y', hue = 'SHOT_MADE_FLAG')
plt.show()

Team 3pt shooting, by attempt:

In [None]:
df.TEAM_NAME.value_counts()

# Step 3 - Let's make some clusters!

For the location feature, we are examining both grids, the defauly shot zones used, or creating our own shot zones.  For the latter, we want to see if simple location clustering of all shots can better categorize location on the court.

In [None]:
df

In [None]:
from sklearn.cluster import KMeans

In [None]:
X = df[['LOC_X','LOC_Y']]

In [None]:
with plt.style.context('seaborn-whitegrid'):
    plt.figure(figsize=(9, 6))
    pd.Series({k: KMeans(k).fit(X).inertia_ for k in range(2, 12)}).plot(marker='x')
    plt.xticks(range(2, 12))
    plt.xlabel('k')
    plt.ylabel('inertia')
    plt.title('Change in inertia as k increases')

In [None]:
kmeans = KMeans(n_clusters=8)
kmeans.fit(X)

clusters = kmeans.predict(X)

In [None]:
df_cluster_test = df.copy()

In [None]:
df_cluster_test['cluster'] = clusters

#### Restricting off a bunch of 

In [None]:
plt.figure(figsize = (16,16))
sns.scatterplot(data = df_cluster_test, x = 'LOC_X', y = 'LOC_Y', hue = 'cluster', cmap = 'Spectral')
plt.show()

In [None]:
df_cluster_test

---

# Univariate Analysis

In [None]:
df.info()

##### Use your iqr and std dev functions!!!

In [None]:
plt.figure(figsize = (12,6))
sns.histplot(data = df[df.SHOT_DISTANCE < 40], x = 'SHOT_DISTANCE')
plt.show()

In [None]:
df = df.astype({'SHOT_DISTANCE':'float'})

In [None]:
df.SHOT_DISTANCE.quantile(.25)

In [None]:
df.SHOT_DISTANCE.quantile(.75)

IQR = 2, IQR * 1.5 = 29 feet

In [None]:
df_igq3 = df[df.SHOT_DISTANCE <= 29]

In [None]:
plt.figure(figsize = (16,16))
sns.scatterplot(data = df_igq3, x = 'LOC_X', y = 'LOC_Y', hue = 'SHOT_MADE_FLAG')
plt.show()

In [None]:
df_igq3.shape

In [None]:
df_igq3.shape[0]/df.shape[0]

In [None]:
X = df_igq3[['LOC_X','LOC_Y']]

In [None]:
kmeans = KMeans(n_clusters=7)
kmeans.fit(X)

clusters = kmeans.predict(X)

In [None]:
df_igq3['cluster'] = clusters

In [None]:
with plt.style.context('seaborn-whitegrid'):
    plt.figure(figsize=(9, 6))
    pd.Series({k: KMeans(k).fit(X).inertia_ for k in range(2, 12)}).plot(marker='x')
    plt.xticks(range(2, 12))
    plt.xlabel('k')
    plt.ylabel('inertia')
    plt.title('Change in inertia as k increases')

In [None]:
plt.figure(figsize = (12,12))
sns.scatterplot(data = df_igq3, x = 'LOC_X', y = 'LOC_Y', hue = 'cluster')
plt.show()

In [None]:
def outlier_destroyer(df,k):
    for col in df.columns:
        if df.column.dtype != 'object':
            q1 = series.quantile(.25)
            q3 = series.quantile(.75)
            iqr = k * (q3 - q1)
            if q1-iqr < 0:
                low_outlier = 0
            else:
                low_outlier = q1 - iqr
            high_outlier = q3 + iqr
            df.drop(df[df[col] < low_outlier].index, inplace=True)
            df.drop(df[df[col] > high_outlier].index, inplace=True)
    return df

-----

# Trying to find the overall 3PT percentage of winning teams

In [None]:
from nba_api.stats.endpoints import leaguegamefinder

In [None]:
df_teams = leaguegamefinder.LeagueGameFinder(league_id_nullable = '00',
                                            season_nullable = '2021-22',
                                            season_type_nullable = 'Regular Season').get_data_frames()

In [None]:
df_season = df_teams[0]

In [None]:
df_season[df_season.MIN > 300]

In [None]:
season_winners = df_season[df_season.WL == 'W']

In [None]:
season_losers = df_season[df_season.WL == 'L']

In [None]:
season_winners.FG3_PCT.mean()

In [None]:
season_losers.FG3_PCT.mean()

Hypothesis Test - are 3pt percentages between the two the same

In [None]:
season_winners.FG3_PCT.var(),season_losers.FG3_PCT.var()

Variances are the same

In [None]:
import scipy.stats as stats

In [None]:
t, p = stats.ttest_ind(season_winners.FG3_PCT, season_losers.FG3_PCT, equal_var=True)
t, p / 2

# Play by Play 

Can estimate shot clock time by analyzizng changes in the clock

# This is for players we want to analyze only!

Let's get the spurs play by play for this season!

In [None]:
spurs_game_ids = df_season[df_season.TEAM_NAME == 'San Antonio Spurs'].GAME_ID

In [None]:
spurs_game_ids = list(spurs_game_ids)

In [None]:
from nba_api.stats.endpoints import playbyplayv2

In [None]:
playbyplayv2.PlayByPlayV2('0022101219').get_data_frames()[0].head(30)

In [None]:
for game in spurs_game_ids:
    df_spurs = playbyplay.PlayByPlay(game).get_data_frames()
                                                         season = '2021-22').get_data_frames()
        player_list = list(df_tpd[1].PLAYER_ID)
        for player in player_list:
            row = [team,player]
            players_list.append(row)
    team_player_ids = pd.DataFrame(players_list, columns = ['team_id','player_id'])
    team_player_ids.to_csv('team_player_ids.csv')

# WE CAN CREATE COLUMNS FOR INDIVIDUAL PLAYERS WHICH GIVE THEIR STATS AT EACH TIMESTAMP IN THE GAME

### boxscoreplayertrackv2 cannot do this unfortuantely

In [None]:
'0022100767'

In [None]:
from nba_api.stats.endpoints import boxscoreplayertrackv2
df = boxscoreplayertrackv2.BoxScorePlayerTrackV2(game_id = '0022100767').get_data_frames()

In [None]:
df[0]

In [None]:
df[0].info()

### Need to write out low hanging features, next level and ultimate!

## Yes!  GAMEROTATION at least gets me sub and gametime.

In [None]:
from nba_api.stats.endpoints import gamerotation

In [None]:
gamerotation.GameRotation(game_id = '0022101219').get_data_frames()[0]

In [None]:
from nba_api.stats.endpoints import hustlestatsboxscore

In [None]:
hustlestatsboxscore.HustleStatsBoxScore(game_id = '0022101219').get_data_frames()[1].info()

In [None]:
from nba_api.stats.endpoints import playerdashboardbygamesplits # You can divide the game into key areas and maybe match them up with the shooting df
from nba_api.stats.endpoints import winprobabilitypbp # You can match this up by time using game and gametime
from nba_api.stats.endpoints import teamgamelog
from nba_api.stats.endpoints import teamgamelogs
from nba_api.stats.endpoints import synergyplaytypes
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import playergamelogs

##### Individual Player Stat splits (by half, quarter, point diferential) - Not sure how helpful, but really cool and intersting

In [None]:
playerdashboardbygamesplits.PlayerDashboardByGameSplits('1629640').get_data_frames()[4] # index ranges from 0-4

##### Win probability by second - this could be very useful when tied into play by play of game.

In [None]:
df_winprob = winprobabilitypbp.WinProbabilityPBP('0022101112').get_data_frames()[0] # 0 only real index

In [None]:
df_winprob[2500:]

In [None]:
df_winprob.ISVISIBLE

##### Teamgamelog - doesn't seem useful

In [None]:
df_tgl = teamgamelog.TeamGameLog('1610612759').get_data_frames()[0] # index of 0 only

In [None]:
df_tgl[df_tgl.WL == 'W'].FG3_PCT.mean()

In [None]:
df_tgl[df_tgl.WL == 'L'].FG3_PCT.mean()

##### Team Game Logs - not helpful

In [None]:
teamgamelogs.TeamGameLogs().get_data_frames()[0] # index of 0 only

##### Not sure how this one works

In [None]:
synergyplaytypes.SynergyPlayTypes().get_data_frames()[0]

##### Player game log only use seems to be plus minus, maybe we use that to determine just how good they are shooting

In [None]:
playergamelog.PlayerGameLog(season = '2021-22',player_id = '1629640').get_data_frames()[0] # zero only -> plus minus may be usable

In [None]:
playergamelogs.PlayerGameLogs().get_data_frames()[0] # zero only -> plus minus may be usable

#### WE CAN *MAYBE* USE THIS TO DESCRIBE THE PLAYER!

In [None]:
from nba_api.stats.endpoints import playerdashptshots

In [None]:
playerdashptshots.PlayerDashPtShots(spurs_id,player_id).get_data_frames()[4]