In [41]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
!pip install dash dash_daq



In [58]:
import pandas as pd
import numpy as np
import plotly.express as px

from dash import Dash, dcc, html, jupyter_dash, dash_table
from dash.dependencies import Input, Output
import dash_daq as daq


import os

In [44]:
project_folder = '/content/drive/MyDrive/nba-project'
os.chdir(project_folder)
pickles_folder = os.path.join(os.getcwd(), 'datasets')
os.getcwd()

'/content/drive/MyDrive/nba-project'

In [45]:
dataframes_path = os.path.join(os.getcwd(), 'datasets')

In [46]:
df = pd.read_pickle(os.path.join(dataframes_path, 'stats_1982_to_2023.pkl'))

In [47]:
df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22426 entries, 0 to 22425
Data columns (total 138 columns):
 #    Column        Dtype  
---   ------        -----  
 0    Rk            int64  
 1    Player        object 
 2    Pos           object 
 3    Age           int64  
 4    Tm            object 
 5    G             int64  
 6    GS            int64  
 7    MP_pg         float64
 8    FG_pg         float64
 9    FGA_pg        float64
 10   FG%           float64
 11   3P_pg         float64
 12   3PA_pg        float64
 13   3P%           float64
 14   2P_pg         float64
 15   2PA_pg        float64
 16   2P%           float64
 17   eFG%          float64
 18   FT_pg         float64
 19   FTA_pg        float64
 20   FT%           float64
 21   ORB_pg        float64
 22   DRB_pg        float64
 23   TRB_pg        float64
 24   AST_pg        float64
 25   STL_pg        float64
 26   BLK_pg        float64
 27   TOV_pg        float64
 28   PF_pg         float64
 29   PTS_pg        fl

In [48]:
def check_rk_season(df, rk_season_pairs):
    return df.apply(lambda x: (x['Rk'], x['Season']) in rk_season_pairs, axis = 1)

In [49]:
def drop_players_multiteams(df):
    df_tot = df[df['Tm'] == 'TOT']
    rk_season_pairs = list(zip(df_tot['Rk'], df_tot['Season']))
    df_tot_full = df[check_rk_season(df, rk_season_pairs)]
    drop_index = df_tot_full[df_tot_full['Tm'] != 'TOT'].index
    return df.drop(drop_index).reset_index(drop = True)

In [50]:
df = drop_players_multiteams(df)

In [51]:
df.describe()

Unnamed: 0,Rk,Age,G,GS,MP_pg,FG_pg,FGA_pg,FG%,3P_pg,3PA_pg,...,DBPM_rank,BPM_rank,VORP_rank,%W,GT,%W_rank,%G,Votes,Share,MaxVotes
count,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,...,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0,18236.0
mean,223.168952,26.622231,53.316133,25.944834,20.387108,3.187404,6.996024,0.441459,0.492816,1.424397,...,40.292937,72.910287,42.87086,0.495453,80.189131,29.830116,0.664446,6.395591,0.005987,1085.342729
std,134.526284,4.115448,25.290913,29.333524,10.095089,2.279637,4.714492,0.097373,0.669533,1.770202,...,14.449608,32.830965,12.134442,0.149636,5.911071,23.070952,0.310031,64.551484,0.059201,169.074929
min,1.0,18.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,0.106,50.0,1.0,0.012,0.0,0.0,690.0
25%,109.0,23.0,34.0,1.0,12.0,1.4,3.3,0.404,0.0,0.0,...,31.0,51.0,38.0,0.37875,82.0,10.0,0.427,0.0,0.0,1000.0
50%,218.0,26.0,61.0,11.0,19.8,2.6,5.8,0.446,0.2,0.6,...,40.0,71.0,46.0,0.5075,82.0,25.0,0.768,0.0,0.0,1130.0
75%,327.0,29.0,76.0,52.0,28.8,4.5,9.8,0.488,0.8,2.3,...,49.0,92.0,51.0,0.61,82.0,45.0,0.939,0.0,0.0,1230.0
max,605.0,44.0,85.0,83.0,43.7,13.4,27.8,1.0,5.3,13.2,...,117.0,208.0,69.0,0.89,82.0,100.0,1.037,1310.0,1.0,1310.0


In [53]:
df.drop(columns = ['FG_tot', '3PA_tot', '2PA_tot', 'FGA_rank_tot'], inplace = True)

In [54]:
df['Contender'] = df['Share'] > 0

In [55]:
df.loc[df.groupby('Season')['Share'].idxmax(), 'Contender'] = 'MVP'

In [78]:
app = Dash(__name__)

app.layout = html.Div([
    html.H1('Correlation between a chosen stats and MVP votes', style = {'color': 'white'}),
    dcc.Graph(id = 'graph', figure = {}),
    html.Label('Player stat - X Axis'),
    dcc.Dropdown(id = 'dropdown_x', options = df.columns, value = 'PTS_pg'),
    daq.BooleanSwitch(id = 'activate_size', on = False, label = 'Activate Size Option'),
    html.Label('Player stat - Dot Size'),
    dcc.Dropdown(id = 'dropdown_size', options = df.columns, value = 'G'),
    daq.BooleanSwitch(id = 'activate_trend', on = True, label = 'Activate Trendline'),
    html.Div(id = 'trend_results')
])

@app.callback(
    Output('graph', 'figure'),
    Output('trend_results', 'children'),
    Input('dropdown_x', 'value'),
    Input('dropdown_size', 'value'),
    Input('activate_size', 'on'),
    Input('activate_trend', 'on'))

def update_graph(value_x, value_size, activate_size, activate_trend):
    size = value_size if activate_size else None
    trendline = 'ols' if activate_trend else None
    fig = px.scatter(
        df, x = value_x,
        size = size,
        color = 'Contender',
        hover_name = 'Player',
        hover_data = ['Season'],
        trendline = trendline,
        trendline_scope = 'overall')
    if activate_trend:
        df = px.get_trendline_results(fig)
        children = dash_table.DataTable(df.to_dict('records'), [{"name": i, "id": i} for i in df.columns])
    else:
        children = []
    return fig, children

app.run(jupyter_mode = 'external')

Dash app running on:


<IPython.core.display.Javascript object>

# Variables que treuria del model:

*   Age
*   
*   

