In [1]:
# Load packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import scipy.stats

from sklearn.neighbors import KernelDensity

In [2]:
# Load shots
with open('./data/players_shots_clean_restricted.pickle', 'rb') as f:
    players_shots = pickle.load(f)
with open('./data/player_position.pickle', 'rb') as f:
    players_position = pickle.load(f)

In [3]:
# VARIABLES
X_MIN, X_MAX = (-250, 250) 
Y_MIN, Y_MAX = (0, 422.5)

XX, YY = np.mgrid[0:1:201j, 0:1:201j]
POS = np.vstack([XX.ravel(), YY.ravel()])

In [4]:
# Function to estimate the density of the shots
def estimate_density(df):
    X = df.LOC_X.to_numpy(dtype='float')
    Y = df.LOC_Y.to_numpy(dtype='float')
    
    # Rescaling to [0, 1]
    X = (X - X_MIN) / (X_MAX - X_MIN)
    Y = (Y - Y_MIN) / (Y_MAX - Y_MIN)
    
    # Kernel density estimates
    values = np.vstack([X, Y])
    kernel = scipy.stats.gaussian_kde(values, bw_method='silverman')
    density = kernel(POS).T
    density[density < 1e-16] = 0
    return np.reshape(density, XX.shape)

In [5]:
# Select only the players with more than 100 made shots not in the restricted area.
cond = players_shots\
    .groupby('PLAYER_ID')\
    .sum()['SHOT_MADE_FLAG'].values > 100
players = players_shots.groupby('PLAYER_ID').sum().index.to_numpy()[cond]
players_shots = players_shots[players_shots.PLAYER_ID.isin(players)]

In [163]:
# Estimate the density of the shots (attempted) for each player.
shots_density = players_shots\
    .groupby('PLAYER_ID')\
    .apply(estimate_density)

In [164]:
# Create the dataframe
shots_density_df = players_shots[['PLAYER_ID', 'PLAYER_NAME']]\
    .drop_duplicates()\
    .join(pd.DataFrame({'DENSITY': shots_density}), on='PLAYER_ID')\
    .reset_index()\
    .drop('index', axis=1)

In [165]:
# Save dataframe
with open('./data/players_shots_density_restricted.pickle', 'wb') as f:
    pickle.dump(shots_density_df, f, protocol=pickle.HIGHEST_PROTOCOL)

In [166]:
# Now, we do the same for the made shots.
players_shots_made = players_shots[players_shots.SHOT_MADE_FLAG == 1]

In [167]:
# Estimate the density of the shots (made) for each player.
shots_density_made = players_shots_made\
    .groupby('PLAYER_ID')\
    .apply(estimate_density)

In [168]:
# Create the dataframe
shots_density_made_df = players_shots[['PLAYER_ID', 'PLAYER_NAME']]\
    .drop_duplicates()\
    .join(pd.DataFrame({'DENSITY': shots_density_made}), on='PLAYER_ID')\
    .reset_index()\
    .drop('index', axis=1)

In [169]:
# Save dataframe
with open('./data/players_shots_density_made_restricted.pickle', 'wb') as f:
    pickle.dump(shots_density_made_df, f, protocol=pickle.HIGHEST_PROTOCOL)