In [9]:
import pandas as pd
import numpy as np

The goal of this notebook is to create the different features in the data for the model. I want to find the (x, y) coordinates of the court itself then seperate it into 13 different shooting locations

- left corner 3, left wing 3, center 3, right wing 3, right corner 3
- deep left baseline 2, deep left wing 2, deep center 2, deep right wing 2, deep right baseline 2
- short left baseline 2, short left wing 2, short center 2, short right wing 2, short right baseline 2
- floater range, layup and dunk (could make layup and dunk a single position
- deep 3 (anything beyond 28 ft in theory)

In [10]:
df = pd.read_csv('2000-2020_shot_charts.csv', index_col = 'GAME_DATE')

In [11]:
df.shape

(640705, 23)

# Messing with the data to try and get an idea of court locations

In [12]:
df[(df.LOC_Y >= 150) & (df.LOC_Y <= 237)].SHOT_ZONE_BASIC.unique()

array(['Above the Break 3', 'Mid-Range'], dtype=object)

In [5]:
df.SHOT_ZONE_BASIC.unique()

array(['Restricted Area', 'Right Corner 3', 'In The Paint (Non-RA)',
       'Left Corner 3', 'Mid-Range', 'Above the Break 3', 'Backcourt'],
      dtype=object)

For x and y location, 10 units in the LOC_X or LOC_Y value is equal to 1 ft on the court. 

-250, -50 = left corner

250, -50 = right corner

0, 238 = top of the key (any value for LOC_Y that is 238 or higher would be great)

Shot Areas: 

### 3 Pointers

X(-250 to -220) & Y(-50 to 87) = left corner 3

X(220 to 250) & Y(-50 to 87) = right corner 3

X(-250 to -80) & Y(87 to 280) & Shot_zone(above the break) = left wing 3

X(80 to 250) & Y(87 to 280) & Shot_zone(above the break) = right wing 3

X(-80 to 80) and Y(87 to 280) & Shot_zone(above the break) = Center 3

X(any) and Y(280 to 350) = Deep 3

X(any) and Y(350+) = Heave

### Mid Range 

X(-220 to -150) & Y(-50 to 90) = left baseline deep midrange

X(150 to 220) & Y(-50 to 90) = right baseline deep midrange

((X(-220 to 150) & Y(90+)) & (X(-150 to 80) & Y(150+))) & Shotzone(Mid-Range) = left wing deep mid ranger

((X(150 to 220) & Y(90+)) & (X(80 to 150) & Y(150+))) & Shotzone(Mid-Range) = right wing deep mid ranger

X(-150 to -80) & Y(-50 to 90) = short left baseline midranger

X(80 to 150) & Y(-50 to 90) = short right baseline midranger

X(-150 to -80) & Y(90 to 150) = short left wing mid ranger

X(80 to 150) & Y(90 to 150) = short right wing mid ranger

X(-80 to 80) & Y(210+) & Shotzone(Mid-Range) = deep center midranger

X(-80 to 80) & Y(150 to 210) = short center midrange

### Paint

X(-80 to 80) & Y(90 to 150) = Floater

X(-80 to 80) & Y(-50) & Shotzone(In The Paint (Non-RA)) = layup/in the paint

Shotzone(restricted area) = restricted area

## Trying to organize the data

#### ran into a few issues, first the nba api is not returning missed shots so will need to work with just the made shot profiles and add on some shooting percentages from a different library

#### secondly, due to issues with multi indexing and grouping by date, i need to use calendar year stats which is not ideal as the nba season goes from late october to mid april in most cases but it still gives good value in the data since it is being done for all players. 

In [13]:
# reset the index to be able to access the datetime again as a string to get the year for each player

new_df = df.reset_index()

In [15]:
# Creating a new df with just the players and the year they played as the index, only 
# included the player idin case it was necessary to concat into the future

players_df = new_df.groupby('PLAYER_NAME')['PLAYER_ID'].mean().to_frame()


In [17]:
left_corner_3 = new_df.loc[(new_df.LOC_X > -250) & (new_df.LOC_X <= -220) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_corner_3'})

right_corner_3 = new_df.loc[(new_df.LOC_X > 220) & (new_df.LOC_X <= 250) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_corner_3'})
        
left_wing_3 = new_df.loc[(new_df.LOC_X > -250) & (new_df.LOC_X <= -80) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 280) & (new_df.SHOT_ZONE_BASIC == 'Above the Break 3')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_wing_3'})

right_wing_3 = new_df.loc[(new_df.LOC_X > 80) & (new_df.LOC_X <= 250) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 280) & (new_df.SHOT_ZONE_BASIC == 'Above the Break 3')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_wing_3'})

center_3 = new_df.loc[(new_df.LOC_X > -80) & (new_df.LOC_X <= 80) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 280) & (new_df.SHOT_ZONE_BASIC == 'Above the Break 3')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'center_3'})

deep_3 = new_df.loc[(new_df.LOC_Y > 280) & (new_df.LOC_Y <= 350)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'deep_3'})

heave = new_df.loc[(new_df.LOC_Y > 350)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'heave'})

left_baseline_deep_2 = new_df.loc[(new_df.LOC_X > -220) & (new_df.LOC_X <= -150) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_baseline_deep_2'})

right_baseline_deep_2 = new_df.loc[(new_df.LOC_X > 150) & (new_df.LOC_X <= 220) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_baseline_deep_2'})

left_wing_deep_2 = new_df.loc[((new_df.LOC_X > -220) & (new_df.LOC_X <= -150) & (new_df.LOC_Y > 87)) | (((new_df.LOC_X > -150) & (new_df.LOC_X <= -80) & (new_df.LOC_Y > 150)) & (new_df.SHOT_ZONE_BASIC == 'Mid-Range'))].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_wing_deep_2'})

right_wing_deep_2 = new_df.loc[(((new_df.LOC_X > 150) & (new_df.LOC_X <= 220) & (new_df.LOC_Y > 87)) | ((new_df.LOC_X > 800) & (new_df.LOC_X <= 150) & (new_df.LOC_Y > 150))) & (new_df.SHOT_ZONE_BASIC == 'Mid-Range')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_wing_deep_2'})

left_baseline_short_2 = new_df.loc[(new_df.LOC_X > -150) & (new_df.LOC_X <= -80) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_baseline_short_2'})

right_baseline_short_2 = new_df.loc[(new_df.LOC_X > 80) & (new_df.LOC_X <= 150) & (new_df.LOC_Y <= 87)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_baseline_short_2'})

left_wing_short_2 = new_df.loc[(new_df.LOC_X > -150) & (new_df.LOC_X <= -80) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 150)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'left_wing_short_2'})

right_wing_short_2 = new_df.loc[(new_df.LOC_X > 80) & (new_df.LOC_X <= 150) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 150)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'right_wing_short_2'})

deep_center_2 = new_df.loc[(new_df.LOC_X > -80) & (new_df.LOC_X <= 80) & (new_df.LOC_Y > 210) & (new_df.SHOT_ZONE_BASIC == 'Mid-Range')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'deep_center_2'})

short_center_2 = new_df.loc[(new_df.LOC_X > -80) & (new_df.LOC_X <= 80) & (new_df.LOC_Y > 150) & (new_df.LOC_Y <= 210)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'short_center_2'})

floater_range = new_df.loc[(new_df.LOC_X > -80) & (new_df.LOC_X <= 80) & (new_df.LOC_Y > 87) & (new_df.LOC_Y <= 150)].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'floater_range'})

in_the_paint = new_df.loc[(new_df.LOC_X > -80) & (new_df.LOC_X <= 80) & (new_df.LOC_Y <= 87) & (new_df.SHOT_ZONE_BASIC == 'In The Paint (Non-RA)')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'in_the_paint'})

restricted_area = new_df.loc[(new_df.SHOT_ZONE_BASIC  == 'Restricted Area')].groupby('PLAYER_NAME')['LOC_X'].count().to_frame().rename(columns = {'LOC_X': 'restricted_area'})


In [18]:
player_shot_profile = pd.concat([players_df, left_corner_3, right_corner_3, left_wing_3, right_wing_3, center_3,
                                 deep_3,heave, left_baseline_deep_2, right_baseline_deep_2, left_wing_deep_2, 
                                 right_wing_deep_2, left_baseline_short_2, right_baseline_short_2, left_wing_short_2,
                                 right_wing_short_2, deep_center_2, short_center_2, floater_range, in_the_paint, 
                                 restricted_area], axis = 1)



In [19]:
player_shot_profile.fillna(0, inplace = True)

In [20]:
player_shot_profile.to_csv('Shot_Profiles.csv')