In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [35]:
scoring_df = pd.read_csv('scoring_dataset.csv')
defense_df = pd.read_csv('defense_dataset.csv')
defense_diff_df = pd.read_csv('defense_diff_dataset.csv')

scoring_columns_to_keep = [
    'PLAYER', 'GP', 'MIN', 'PTS', 'DRIVES',
    'C&S\nPTS', 'C&S\nFG%', 
    'Pull Up\nPTS', 'Pull Up\nFG%', 
    'eFG%', 'Shot Quality', 
    'Assisted 2s\n PTS', 'Unassisted 2s\n PTS',
    'Assisted 3s\n PTS', 'Unassisted 3s\n PTS'
]

defense_columns_to_keep = [
    'Player', 'STL', 'BLK'
]

defense_diff_columns_to_keep = [
    'PLAYER', 'DIFF%'
]

cleaned_scoring = scoring_df[scoring_columns_to_keep].copy()
cleaned_defense = defense_df[defense_columns_to_keep].copy()
cleaned_defense_diff = defense_diff_df[defense_diff_columns_to_keep].copy()

cleaned_scoring.columns = [
    'Player', 'GP', 'MIN', 'PPG', 'Drives',
    'C&S PTS', 'C&S FG%',
    'PU PTS', 'PU FG%',
    'eFG%', 'Shot Quality',
    'Assisted 2s', 'Unassisted 2s',
    'Assisted 3s', 'Unassisted 3s'
]

cleaned_defense.columns = [
    'Player', 'Steals', 'Blocks'
]

cleaned_defense_diff.columns = [
    'Player', 'DIFF%'
]

percent_cols = ['C&S FG%', 'eFG%', 'PU FG%']

for col in percent_cols:
    cleaned_scoring[col] = cleaned_scoring[col].astype(str).str.rstrip('%').astype(float) / 100

Unassisted_PTS = cleaned_scoring['Unassisted 2s'] + cleaned_scoring['Unassisted 3s']
cleaned_scoring['Unassisted PTS'] = Unassisted_PTS
cleaned_scoring['Unassisted%'] = cleaned_scoring['Unassisted PTS'] / cleaned_scoring['PPG']
cleaned_scoring['C&S%'] = cleaned_scoring['C&S PTS'] / cleaned_scoring['PPG']
cleaned_scoring['PU%'] = cleaned_scoring['PU PTS'] / cleaned_scoring['PPG']
cleaned_scoring['Gap'] = cleaned_scoring['eFG%'] - cleaned_scoring['Shot Quality']

cleaned = pd.merge(cleaned_scoring, cleaned_defense, on='Player', how='inner')
cleaned = pd.merge(cleaned, cleaned_defense_diff, on='Player', how='inner')


final_columns_to_drop = [
    'GP', 'MIN', 'PPG',
    'C&S PTS', 'C&S FG%',
    'PU PTS', 'PU FG%',
    'Assisted 2s', 'Unassisted 2s',
    'Assisted 3s', 'Unassisted 3s',
    'Unassisted PTS'
]

cleaned.drop(columns=final_columns_to_drop, inplace=True)
cleaned.set_index('Player', inplace=True)
cleaned


Unnamed: 0_level_0,Drives,eFG%,Shot Quality,Unassisted%,C&S%,PU%,Gap,Steals,Blocks,DIFF%
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Amen Thompson,7.5,0.575,0.56,0.358156,0.078014,0.070922,0.015,1.4,1.3,-5.5
Andrew Nembhard,8.4,0.506,0.51,0.359000,0.210000,0.280000,-0.004,1.2,0.2,1.3
Anfernee Simons,9.3,0.521,0.51,0.504663,0.259067,0.357513,0.011,0.9,0.1,4.4
Anthony Edwards,13.2,0.547,0.54,0.457609,0.126812,0.409420,0.007,1.2,0.6,-0.6
Austin Reaves,10.6,0.556,0.52,0.324257,0.237624,0.262376,0.036,1.1,0.3,3.3
...,...,...,...,...,...,...,...,...,...,...
Tyler Herro,13.9,0.563,0.52,0.371967,0.230126,0.338912,0.043,0.9,0.2,3.1
Tyrese Haliburton,10.8,0.582,0.52,0.458602,0.139785,0.489247,0.062,1.4,0.7,2.3
Tyrese Maxey,14.2,0.511,0.54,0.500000,0.133080,0.330798,-0.029,1.8,0.4,-0.4
Tyus Jones,4.7,0.570,0.52,0.282353,0.529412,0.215686,0.050,0.9,0.1,2.6


In [36]:
potential_features = ['Drives', 'Gap', 'Unassisted%', 'C&S%', 'PU%', 'Steals', 'DIFF%']

In [37]:
fig = px.scatter(
    cleaned,
    x='Shot Quality',
    y='eFG%',
    hover_name='Player',
    title='eFG% vs Shot Quality',
    labels={'Shot Quality': 'Shot Quality (Expected eFG%)', 'eFG%': 'Actual eFG%'},
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        y=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        mode='lines',
        line=dict(dash='dash', color='red'),
        name='actual = expected'
    )
)

fig.update_layout(template='plotly_white')
fig.show()


ValueError: Value of 'hover_name' is not the name of a column in 'data_frame'. Expected one of ['Drives', 'eFG%', 'Shot Quality', 'Unassisted%', 'C&S%', 'PU%', 'Gap', 'Steals', 'Blocks', 'DIFF%'] but received: Player

In [43]:
fig = px.scatter(
    cleaned,
    x='Shot Quality',
    y='Gap',
    hover_name='Player',
    title='Gap vs Shot Quality',
    labels={'Shot Quality': 'Shot Quality (Expected eFG%)', 'Gap': 'Gap (Actual eFG% - Expected eFG%)'},
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=[cleaned['Shot Quality'].min(), cleaned['Shot Quality'].max()],
        y=[0, 0],
        mode='lines',
        line=dict(dash='dash', color='red'),
        name='y = 0'
    )
)

fig.update_layout(template='plotly_white')
fig.show()

In [44]:
x = cleaned['Unassisted%']
y = cleaned['eFG%']

m, b = np.polyfit(x, y, 1)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = m * line_x + b

fig = px.scatter(
    cleaned,
    x='Unassisted%',
    y='eFG%',
    hover_name='Player',
    title='Unassisted Scoring % vs eFG% for NBA Guards',
    labels={
        'Unassisted%': 'Unassisted Points (%)',
        'eFG%': 'Effective Field Goal %'
    },
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=line_x,
        y=line_y,
        mode='lines',
        name='Trendline',
        line=dict(color='red', dash='dash')
    )
)

fig.update_layout(template='plotly_white')
fig.show()


In [4]:
x = cleaned['C&S%']
y = cleaned['eFG%']

m, b = np.polyfit(x, y, 1)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = m * line_x + b

fig = px.scatter(
    cleaned,
    x='C&S%',
    y='eFG%',
    hover_name='Player',
    title='Unassisted Scoring % vs eFG% for NBA Guards',
    labels={
        'C&S%': 'Catch and Shoot Points (%)',
        'eFG%': 'Effective Field Goal %'
    },
    width=800,
    height=600
)

fig.add_trace(
    go.Scatter(
        x=line_x,
        y=line_y,
        mode='lines',
        name='Trendline',
        line=dict(color='red', dash='dash')
    )
)

fig.update_layout(template='plotly_white')
fig.show()
