In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [54]:
scoring_df = pd.read_csv('scoring_dataset.csv')
defense_df = pd.read_csv('defense_dataset.csv')
defense_diff_df = pd.read_csv('defense_diff_dataset.csv')

scoring_columns_to_keep = [
    'PLAYER', 'GP', 'MIN', 'PTS', 'Drive\nPTS',
    'C&S\nPTS', 'C&S\nFG%', 
    'Pull Up\nPTS', 'Pull Up\nFG%', 
    'eFG%', 'Shot Quality', 
    'Assisted 2s\n PTS', 'Unassisted 2s\n PTS',
    'Assisted 3s\n PTS', 'Unassisted 3s\n PTS'
]

defense_columns_to_keep = [
    'Player', 'STL', 'BLK'
]

defense_diff_columns_to_keep = [
    'PLAYER', 'DIFF%'
]

cleaned_scoring = scoring_df[scoring_columns_to_keep].copy()
cleaned_defense = defense_df[defense_columns_to_keep].copy()
cleaned_defense_diff = defense_diff_df[defense_diff_columns_to_keep].copy()

cleaned_scoring.columns = [
    'Player', 'GP', 'MIN', 'PPG', 'Drive PTS',
    'C&S PTS', 'C&S FG%',
    'PU PTS', 'PU FG%',
    'eFG%', 'Shot Quality',
    'Assisted 2s', 'Unassisted 2s',
    'Assisted 3s', 'Unassisted 3s'
]

cleaned_defense.columns = [
    'Player', 'Steals', 'Blocks'
]

cleaned_defense_diff.columns = [
    'Player', 'DIFF%'
]

percent_cols = ['C&S FG%', 'eFG%', 'PU FG%']

for col in percent_cols:
    cleaned_scoring[col] = cleaned_scoring[col].astype(str).str.rstrip('%').astype(float) / 100

Unassisted_PTS = cleaned_scoring['Unassisted 2s'] + cleaned_scoring['Unassisted 3s']
cleaned_scoring['Unassisted PTS'] = Unassisted_PTS
cleaned_scoring['Unassisted%'] = cleaned_scoring['Unassisted PTS'] / cleaned_scoring['PPG']
cleaned_scoring['C&S%'] = cleaned_scoring['C&S PTS'] / cleaned_scoring['PPG']
cleaned_scoring['PU%'] = cleaned_scoring['PU PTS'] / cleaned_scoring['PPG']
cleaned_scoring['Drive%'] = cleaned_scoring['Drive PTS'] / cleaned_scoring['PPG']
cleaned_scoring['Gap'] = cleaned_scoring['eFG%'] - cleaned_scoring['Shot Quality']
cleaned_defense['SPM'] = cleaned_defense['Steals'] / cleaned_scoring['MIN']
cleaned_defense_diff['DIFF%'] = cleaned_defense_diff['DIFF%'] / 100

cleaned = pd.merge(cleaned_scoring, cleaned_defense, on='Player', how='inner')
cleaned = pd.merge(cleaned, cleaned_defense_diff, on='Player', how='inner')


final_columns_to_drop = [
    'GP', 'MIN', 'PPG',
    'C&S PTS', 'C&S FG%',
    'PU PTS', 'PU FG%',
    'Assisted 2s', 'Unassisted 2s',
    'Assisted 3s', 'Unassisted 3s',
    'Unassisted PTS', 'Blocks',
    'eFG%', 'Shot Quality', 'Drive PTS',
    'Steals'
]

cleaned.drop(columns=final_columns_to_drop, inplace=True)
cleaned.set_index('Player', inplace=True)
cleaned

Unnamed: 0_level_0,Unassisted%,C&S%,PU%,Drive%,Gap,SPM,DIFF%
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Amen Thompson,0.358156,0.078014,0.070922,0.241135,0.015,0.043344,-0.055
Andrew Nembhard,0.359000,0.210000,0.280000,0.420000,-0.004,0.041522,0.013
Anfernee Simons,0.504663,0.259067,0.357513,0.316062,0.011,0.027523,0.044
Anthony Edwards,0.457609,0.126812,0.409420,0.307971,0.007,0.033058,-0.006
Austin Reaves,0.324257,0.237624,0.262376,0.341584,0.036,0.031519,0.033
...,...,...,...,...,...,...,...
Tyler Herro,0.371967,0.230126,0.338912,0.326360,0.043,0.025424,0.031
Tyrese Haliburton,0.458602,0.139785,0.489247,0.284946,0.062,0.041667,0.023
Tyrese Maxey,0.500000,0.133080,0.330798,0.391635,-0.029,0.047745,-0.004
Tyus Jones,0.282353,0.529412,0.215686,0.235294,0.050,0.033582,0.026
