In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install dash dash_daq



In [3]:
import pandas as pd
import numpy as np
import plotly.express as px

from dash import Dash, dcc, html, jupyter_dash, dash_table
from dash.dependencies import Input, Output
import dash_daq as daq


import os

In [4]:
project_folder = '/content/drive/MyDrive/nba-project'
os.chdir(project_folder)
pickles_folder = os.path.join(os.getcwd(), 'datasets')
os.getcwd()

'/content/drive/MyDrive/nba-project'

In [5]:
dataframes_path = os.path.join(os.getcwd(), 'datasets')

In [6]:
df = pd.read_pickle(os.path.join(dataframes_path, 'stats_1982_to_2024.pkl'))

In [7]:
df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23161 entries, 0 to 23160
Data columns (total 138 columns):
 #    Column        Dtype  
---   ------        -----  
 0    Rk            int64  
 1    Player        object 
 2    Pos           object 
 3    Age           int64  
 4    Tm            object 
 5    G             int64  
 6    GS            int64  
 7    MP_pg         float64
 8    FG_pg         float64
 9    FGA_pg        float64
 10   FG%           float64
 11   3P_pg         float64
 12   3PA_pg        float64
 13   3P%           float64
 14   2P_pg         float64
 15   2PA_pg        float64
 16   2P%           float64
 17   eFG%          float64
 18   FT_pg         float64
 19   FTA_pg        float64
 20   FT%           float64
 21   ORB_pg        float64
 22   DRB_pg        float64
 23   TRB_pg        float64
 24   AST_pg        float64
 25   STL_pg        float64
 26   BLK_pg        float64
 27   TOV_pg        float64
 28   PF_pg         float64
 29   PTS_pg        fl

In [8]:
def check_rk_season(df, rk_season_pairs):
    return df.apply(lambda x: (x['Rk'], x['Season']) in rk_season_pairs, axis = 1)

In [9]:
def drop_players_multiteams(df):
    df_tot = df[df['Tm'] == 'TOT']
    rk_season_pairs = list(zip(df_tot['Rk'], df_tot['Season']))
    df_tot_full = df[check_rk_season(df, rk_season_pairs)]
    drop_index = df_tot_full[df_tot_full['Tm'] != 'TOT'].index
    return df.drop(drop_index).reset_index(drop = True)

In [10]:
df = drop_players_multiteams(df)

In [11]:
df.describe()

Unnamed: 0,Rk,Age,G,GS,MP_pg,FG_pg,FGA_pg,FG%,3P_pg,3PA_pg,...,DBPM_rank,BPM_rank,VORP_rank,%W,GT,%W_rank,%G,Votes,Share,MaxVotes
count,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,...,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0,18808.0
mean,225.095013,26.595491,53.098309,25.809762,20.334299,3.185363,6.986516,0.441706,0.506444,1.461559,...,40.581774,73.168705,42.998777,0.495283,80.244205,29.989845,0.661356,6.337941,0.005943,1082.443109
std,135.994237,4.121477,25.327597,29.271967,10.093564,2.285253,4.723313,0.097904,0.681204,1.79859,...,14.585834,32.966224,12.126399,0.149788,5.828787,23.272134,0.310556,64.245631,0.059052,167.287074
min,1.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,0.106,50.0,1.0,0.012,0.0,0.0,690.0
25%,110.0,23.0,34.0,1.0,12.0,1.4,3.3,0.405,0.0,0.1,...,31.0,51.0,38.0,0.378,82.0,10.0,0.427,0.0,0.0,990.0
50%,219.0,26.0,60.0,11.0,19.7,2.6,5.8,0.446,0.2,0.7,...,41.0,71.0,47.0,0.511,82.0,25.0,0.764,0.0,0.0,1130.0
75%,330.0,29.0,76.0,52.0,28.8,4.5,9.8,0.489,0.8,2.4,...,50.0,93.0,51.0,0.61,82.0,46.0,0.939,0.0,0.0,1230.0
max,605.0,44.0,85.0,83.0,43.7,13.4,27.8,1.0,5.3,13.2,...,117.0,208.0,69.0,0.89,82.0,100.0,1.037,1310.0,1.0,1310.0


In [12]:
df.drop(columns = ['FG_tot', '3PA_tot', '2PA_tot', 'FGA_rank_tot'], inplace = True)

In [13]:
df['Contender'] = df['Share'] > 0

In [14]:
df.loc[df.groupby('Season')['Share'].idxmax(), 'Contender'] = 'MVP'

In [15]:
px.violin(data_frame = df, y = '%G', box = True)

In [16]:
df['%G'].quantile(0.25)

0.427

In [28]:
cols_to_adjust = ['PER', 'USG%', 'WS/48', 'OBPM', 'BPM']
for col in cols_to_adjust:
    df[f'{col}_adj'] = df[col]
    if col in ['OBPM, BPM']:
        df.loc[df['%G'] < df['%G'].quantile(0.25), f'{col}_adj'] = -5
    elif col == 'PER':
        df.loc[df['%G'] < df['%G'].quantile(0.25), f'{col}_adj'] = 5
    elif col == 'USG%':
        df.loc[df['%G'] < df['%G'].quantile(0.25), f'{col}_adj'] = 0.05
    else:
        df.loc[df['%G'] < df['%G'].quantile(0.25), f'{col}_adj'] = 0

In [29]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1('Correlation between a chosen stats and MVP votes', style = {'color': 'white'}),
    dcc.Graph(id = 'graph', figure = {}),
    html.Label('Player stat - X Axis'),
    dcc.Dropdown(id = 'dropdown_x', options = df.columns, value = 'PTS_pg'),
    daq.BooleanSwitch(id = 'activate_size', on = False, label = 'Activate Size Option'),
    html.Label('Player stat - Dot Size'),
    dcc.Dropdown(id = 'dropdown_size', options = df.columns, value = 'G'),
    daq.BooleanSwitch(id = 'activate_trend', on = True, label = 'Activate Trendline'),
])

@app.callback(
    Output('graph', 'figure'),
    Input('dropdown_x', 'value'),
    Input('dropdown_size', 'value'),
    Input('activate_size', 'on'),
    Input('activate_trend', 'on'))

def update_graph(value_x, value_size, activate_size, activate_trend):
    size = value_size if activate_size else None
    trendline = 'ols' if activate_trend else None
    fig = px.scatter(
        df, x = value_x,
        size = size,
        y = 'Share',
        color = 'Contender',
        hover_name = 'Player',
        hover_data = ['Season'],
        trendline = trendline,
        trendline_scope = 'overall')
    return fig

app.run(jupyter_mode = 'external')

Dash app running on:


<IPython.core.display.Javascript object>

# Variables que treuria del model:

*   Age
*   G
*   GS
*   FGA_pg
*   FG%
*   3P_pg
*   3PA_pg
*   3P%
*   2PA_pg
*   2P%
*   eFG%
*   FT%
*   ORB_pg
*   DRB_pg
*   PF_pg
*   TS%
*   3PAr
*   FTr
*   ORB%
*   DRB%
*   TRB%
*   AST%
*   STL%
*   BLK%
*   TOV%
*   OBPM
*   DBPM
*   *_rank

# Variables que potser treuria del model:

*   TOV_pg
*   %G


# Variables que poden millorar si no es tenen en compte jugadors que han jugat molt poc

*   PER --> millora fixant a 5
*   USG% --> millora fixant a 0.05
*   WS/48 --> millora fixant a 0
*   BPM --> millora fixant a -5

