In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
plt.rcParams['figure.figsize'] =  (20,10)

In [None]:
df = pd.read_csv("/kaggle/input/nba-players-shootingcsv/nba_players_shooting.csv", index_col=0)

# Discover the data + Some Cleaning

In [None]:
df.sample(10)

In [None]:
df.info()

Transforming from Ft to meters

In [None]:
df['X'] = df['X'] * 0.3048
df['Y'] = df['Y'] * 0.3048

In [None]:
num_desc= df.describe()
num_desc

In [None]:
cat_desc = df.drop(columns=num_desc.columns).describe()
cat_desc

In [None]:
df.SHOOTER.unique()

In [None]:
df.DEFENDER.unique()

In [None]:
df.RANGE.unique()

So the Dataset is about 4 players and their performace with each other on defending and shooting the ball

In [None]:
sns.countplot(data=df,x='SCORE')
print(df.SCORE.value_counts())

So we can see that the number is almost the same with missing shots slightly higher (56)

In [None]:
plt.title("Attack Capacity")
sns.countplot(data=df,x='SHOOTER',hue='SCORE')

In [None]:
plt.title("Defense Capacity")
sns.countplot(data=df.replace({'MADE':'MISSED','MISSED': "DEFENDED"}),x='DEFENDER',hue='SCORE')

In [None]:
def annotate(row):
    if(row.TYPE == 'DEFENDER' and row.SCORE == 'MISSED'):
        row.SCORE = 'DEFENDED'
    elif(row.TYPE == 'DEFENDER' and row.SCORE == 'MADE'):
        row.SCORE = 'MISSED_DEFENSE'
    return row
score_df = df.set_index(['X','Y','RANGE','SCORE'])
score_df = score_df.stack().reset_index().rename(columns={0:'NAME','level_4': 'TYPE'})
score_df = score_df.apply(lambda row: annotate(row),axis=1)
score_df

In [None]:
temp_score_df = score_df.groupby(['NAME','SCORE']).size().reset_index().rename(columns={0:'VALUE'})
temp_score_df


In [None]:
sns.barplot(data=temp_score_df,x='NAME',y='VALUE',hue='SCORE')

In [None]:
cats = score_df.groupby(['NAME','TYPE']).size()
cats

In [None]:
def percent(row):
    if(row.SCORE in ['MISSED','MADE']):
        return row.VALUE / cats[row.NAME,'SHOOTER']
    return row.VALUE / cats[row.NAME,'DEFENDER']
temp_score_df['PERCENTAGE']=temp_score_df.apply(lambda row: percent(row),axis=1)

In [None]:
sns.barplot(data=temp_score_df,x='NAME',y='PERCENTAGE',hue='SCORE')

<ol>
<li>Seth Curry: Bad in defense but Strong in Attack </li>
<li>Russel Westbrook: Bad in attack but super strong in defense </li>
<li>Chris Paul : Balanced Player</li>
<li>Trae Young: Worst in terms of performance</li>
</ol>

In [None]:

cats = score_df.groupby(['NAME','TYPE','RANGE']).size()
cats

In [None]:
temp_score_df = score_df.groupby(['NAME','RANGE','SCORE']).size().reset_index().rename(columns={0:'VALUE'})
temp_score_df

In [None]:
def percent(row):
    if(row.SCORE in ['MISSED','MADE']):
        return row.VALUE / cats[row.NAME,'SHOOTER',row.RANGE]
    return row.VALUE / cats[row.NAME,'DEFENDER',row.RANGE]
temp_score_df['PERCENTAGE']=temp_score_df.apply(lambda row: percent(row),axis=1)
temp_score_df
range_order =  [temp_score_df.RANGE.unique()[i-1] for i in [1,6,2,3,4,5]  ]


In [None]:
sns.barplot(data=temp_score_df.sort_values('RANGE'),x='RANGE',y='PERCENTAGE',hue='SCORE',order= range_order )

In [None]:
fig,axs = plt.subplots(figsize=(20,40),nrows=4,ncols=1)
for index,player in enumerate(temp_score_df.NAME.unique()):
    axs[index].set_title(player)
    sns.barplot(data=temp_score_df[temp_score_df.NAME == player],x='RANGE',y='PERCENTAGE',hue='SCORE',order=range_order,ax=axs[index])

So To resmue the things where have :

In [None]:
players_specs = temp_score_df.groupby(['NAME','SCORE']).apply(lambda rows: rows.nlargest(1,columns=['PERCENTAGE']).RANGE.values[0])
players_specs = players_specs.unstack().rename(columns={
    'DEFENDED': 'Best Defense',
    'MADE': 'Best Attack',
    'MISSED_DEFENSE': 'Worst Defense',
    'MISSED': 'Worst Attack',
}).reset_index().set_index('NAME')
players_specs.columns.name= "Performance"

players_specs

In [None]:
fig,axs = plt.subplots(figsize=(20,40),nrows=6,ncols=1)
for index,rg in enumerate(temp_score_df.RANGE.unique()):
    axs[index].set_title(rg)
    sns.barplot(data=temp_score_df[temp_score_df.RANGE == rg],x='SCORE',y='PERCENTAGE',hue='NAME',ax=axs[index])

In [None]:
players_specs = temp_score_df.groupby(['SCORE','RANGE']).apply(lambda rows: rows.nlargest(1,columns=['PERCENTAGE']).NAME.values[0])

players_specs = players_specs.unstack(level=0).rename(columns={
    'DEFENDED': 'Best Defense',
    'MADE': 'Best Attack',
    'MISSED_DEFENSE': 'Worst Defense',
    'MISSED': 'Worst Attack',
}).reset_index().set_index('RANGE')
players_specs.columns.name= "Performance"
players_specs

# Exploring the positions

In [None]:
sns.histplot(data=df,x='X') 
print(df.X.describe())


In [None]:
sns.histplot(data=df,x='Y') 
print(df.Y.describe())


In [None]:
sns.scatterplot(data=df,x='X',y='Y',hue='SHOOTER')


In [None]:
sns.kdeplot(data=df,x='X',y='Y',hue='SCORE',levels=4,bw_adjust=0.1,palette='viridis')


In [None]:
fig,axs = plt.subplots(figsize=(20,40),nrows=4,ncols=1)
for index,player in enumerate(score_df.NAME.unique()):
    axs[index].set_title(player)
    sns.kdeplot(data=score_df[score_df.NAME == player],x='X',y='Y',hue='SCORE',levels=2,bw_adjust=0.1,ax=axs[index])


An example of what we can conclude is that for Russel , is more efficient in defending the left wing ( negative X )   than the right one etc..