In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [3]:
raw_df = pd.read_csv('nba_stats_dataset.csv')

columns_to_keep = [
    'PLAYER', 'GP', 'MIN', 'PTS', 
    'C&S\nPTS', 'C&S\nFG%', 
    'Pull Up\nPTS', 'Pull Up\nFG%', 
    'eFG%', 'Shot Quality', 
    'Assisted 2s\n PTS', 'Unassisted 2s\n PTS',
    'Assisted 3s\n PTS', 'Unassisted 3s\n PTS'
]

cleaned = raw_df[columns_to_keep].copy()

cleaned.columns = [
    'Player', 'GP', 'MIN', 'PPG',
    'C&S PTS', 'C&S FG%',
    'PU PTS', 'PU FG%',
    'eFG%', 'Shot Quality',
    'Assisted 2s', 'Unassisted 2s',
    'Assisted 3s', 'Unassisted 3s'
]

percent_cols = ['C&S FG%', 'eFG%', 'PU FG%']

for col in percent_cols:
    cleaned[col] = cleaned[col].astype(str).str.rstrip('%').astype(float) / 100

Unassisted_PTS = cleaned['Unassisted 2s'] + cleaned['Unassisted 3s']
cleaned['Unassisted PTS'] = Unassisted_PTS
cleaned['Unassisted%'] = cleaned['Unassisted PTS'] / cleaned['PPG']

cleaned['C&S%'] = cleaned['C&S PTS'] / cleaned['PPG']

cleaned['Gap'] = cleaned['eFG%'] - cleaned['Shot Quality']

cleaned



Unnamed: 0,Player,GP,MIN,PPG,C&S PTS,C&S FG%,PU PTS,PU FG%,eFG%,Shot Quality,Assisted 2s,Unassisted 2s,Assisted 3s,Unassisted 3s,Unassisted PTS,Unassisted%,C&S%,Gap
0,Amen Thompson,69,32.3,14.1,1.1,0.292,1.0,0.354,0.575,0.56,5.51,5.01,1.04,0.04,5.05,0.358156,0.078014,0.015
1,Andrew Nembhard,65,28.9,10.0,2.1,0.321,2.8,0.432,0.506,0.51,2.86,3.17,1.94,0.42,3.59,0.359000,0.210000,-0.004
2,Anfernee Simons,70,32.7,19.3,5.0,0.377,6.9,0.383,0.521,0.51,1.40,6.14,5.61,3.60,9.74,0.504663,0.259067,0.011
3,Anthony Edwards,79,36.3,27.6,3.5,0.422,11.3,0.378,0.547,0.54,3.90,6.25,5.77,6.38,12.63,0.457609,0.126812,0.007
4,Austin Reaves,73,34.9,20.2,4.8,0.400,5.3,0.384,0.556,0.52,3.34,4.25,5.92,2.30,6.55,0.324257,0.237624,0.036
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,Tyler Herro,77,35.4,23.9,5.5,0.401,8.1,0.429,0.563,0.52,3.92,6.47,7.36,2.42,8.89,0.371967,0.230126,0.043
82,Tyrese Haliburton,73,33.6,18.6,2.6,0.424,9.1,0.413,0.582,0.52,2.77,4.30,4.73,4.23,8.53,0.458602,0.139785,0.062
83,Tyrese Maxey,52,37.7,26.3,3.5,0.355,8.7,0.371,0.511,0.54,3.73,8.42,4.56,4.73,13.15,0.500000,0.133080,-0.029
84,Tyus Jones,81,26.8,10.2,5.4,0.435,2.2,0.394,0.570,0.52,1.36,2.10,5.37,0.78,2.88,0.282353,0.529412,0.050


In [42]:
fig = px.scatter(
    cleaned,
    x='Shot Quality',
    y='eFG%',
    hover_name='Player',
    title='eFG% vs Shot Quality',
    labels={'Shot Quality': 'Shot Quality (Expected eFG%)', 'eFG%': 'Actual eFG%'},
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        y=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        mode='lines',
        line=dict(dash='dash', color='red'),
        name='actual = expected'
    )
)

fig.update_layout(template='plotly_white')
fig.show()


In [43]:
fig = px.scatter(
    cleaned,
    x='Shot Quality',
    y='Gap',
    hover_name='Player',
    title='Gap vs Shot Quality',
    labels={'Shot Quality': 'Shot Quality (Expected eFG%)', 'Gap': 'Gap (Actual eFG% - Expected eFG%)'},
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        y=[0, 0],
        mode='lines',
        line=dict(dash='dash', color='red'),
        name='y = 0'
    )
)

fig.update_layout(template='plotly_white')
fig.show()

In [44]:
x = cleaned['Unassisted%']
y = cleaned['eFG%']

m, b = np.polyfit(x, y, 1)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = m * line_x + b

fig = px.scatter(
    cleaned,
    x='Unassisted%',
    y='eFG%',
    hover_name='Player',
    title='Unassisted Scoring % vs eFG% for NBA Guards',
    labels={
        'Unassisted%': 'Unassisted Points (%)',
        'eFG%': 'Effective Field Goal %'
    },
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=line_x,
        y=line_y,
        mode='lines',
        name='Trendline',
        line=dict(color='red', dash='dash')
    )
)

fig.update_layout(template='plotly_white')
fig.show()


In [4]:
x = cleaned['C&S%']
y = cleaned['eFG%']

m, b = np.polyfit(x, y, 1)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = m * line_x + b

fig = px.scatter(
    cleaned,
    x='C&S%',
    y='eFG%',
    hover_name='Player',
    title='Unassisted Scoring % vs eFG% for NBA Guards',
    labels={
        'C&S%': 'Catch and Shoot Points (%)',
        'eFG%': 'Effective Field Goal %'
    },
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=line_x,
        y=line_y,
        mode='lines',
        name='Trendline',
        line=dict(color='red', dash='dash')
    )
)

fig.update_layout(template='plotly_white')
fig.show()
