In [1]:
# Load packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import scipy.stats

from sklearn.neighbors import KernelDensity

In [2]:
# Load shots
with open('./data/players_shots_clean_restricted.pickle', 'rb') as f:
    players_shots = pickle.load(f)
with open('./data/player_position.pickle', 'rb') as f:
    players_position = pickle.load(f)

In [3]:
# VARIABLES
X_MIN, X_MAX = (-250, 250) 
Y_MIN, Y_MAX = (0, 422.5)

XX, YY = np.mgrid[0:1:201j, 0:1:201j]
POS = np.vstack([XX.ravel(), YY.ravel()])

In [4]:
# Functions
def estimate_density(df):
    X = df.LOC_X.to_numpy(dtype='float')
    Y = df.LOC_Y.to_numpy(dtype='float')
    
    # Rescaling to [0, 1]
    X = (X - X_MIN) / (X_MAX - X_MIN)
    Y = (Y - Y_MIN) / (Y_MAX - Y_MIN)
    
    # Kernel density estimates
    values = np.vstack([X, Y])
    kernel = scipy.stats.gaussian_kde(values, bw_method='silverman')
    density = kernel(POS).T
    density[density < 1e-16] = 0
    return np.reshape(density, XX.shape)

In [6]:
# not_centers = players_position.query(
#     "POSITION != 'Center' & POSITION != 'Center-Forward' & POSITION != 'Forward-Center'"
# )
# players_shots = players_shots.loc[[i in not_centers.PLAYER_ID.values for i in players_shots.PLAYER_ID.values]]
# players_shots = players_shots.query("PLAYER_NAME != 'Mitchell Robinson'")

In [5]:
# Select only the players with more than 100 made shots not in the restricted area.
cond = players_shots\
    .groupby('PLAYER_ID')\
    .sum()['SHOT_MADE_FLAG'].values > 100
players = players_shots.groupby('PLAYER_ID').sum().index.to_numpy()[cond]
players_shots = players_shots[players_shots.PLAYER_ID.isin(players)]

In [8]:
len(players)

119

In [12]:
71893 / 186621

0.3852353164970716

In [11]:
players_shots

Unnamed: 0,PLAYER_ID,PLAYER_NAME,LOC_X,LOC_Y,SHOT_MADE_FLAG
0,1628389,Bam Adebayo,-187,402,0
25,1628389,Bam Adebayo,-94,141,0
51,1628389,Bam Adebayo,85,108,1
65,1628389,Bam Adebayo,81,173,0
83,1628389,Bam Adebayo,65,120,1
...,...,...,...,...,...
1377,1629027,Trae Young,-142,329,0
1379,1629027,Trae Young,54,304,0
1381,1629027,Trae Young,115,135,1
1386,1629027,Trae Young,-104,294,0


In [163]:
shots_density = players_shots\
    .groupby('PLAYER_ID')\
    .apply(estimate_density)

In [164]:
shots_density_df = players_shots[['PLAYER_ID', 'PLAYER_NAME']]\
    .drop_duplicates()\
    .join(pd.DataFrame({'DENSITY': shots_density}), on='PLAYER_ID')\
    .reset_index()\
    .drop('index', axis=1)

In [165]:
# Save dataframe
with open('./data/players_shots_density_restricted.pickle', 'wb') as f:
    pickle.dump(shots_density_df, f, protocol=pickle.HIGHEST_PROTOCOL)

In [166]:
# Now, we do the same for the made shots.
players_shots_made = players_shots[players_shots.SHOT_MADE_FLAG == 1]

In [167]:
shots_density_made = players_shots_made\
    .groupby('PLAYER_ID')\
    .apply(estimate_density)

In [168]:
shots_density_made_df = players_shots[['PLAYER_ID', 'PLAYER_NAME']]\
    .drop_duplicates()\
    .join(pd.DataFrame({'DENSITY': shots_density_made}), on='PLAYER_ID')\
    .reset_index()\
    .drop('index', axis=1)

In [169]:
# Save dataframe
with open('./data/players_shots_density_made_restricted.pickle', 'wb') as f:
    pickle.dump(shots_density_made_df, f, protocol=pickle.HIGHEST_PROTOCOL)