In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

from jupyter_dash import JupyterDash
from dash import dash, dcc, html
from dash.dependencies import Input, Output


import os

In [2]:
dataframes_path = os.path.join(os.getcwd(), 'dataframes')

In [3]:
df = pd.read_pickle(os.path.join(dataframes_path, 'complete_stats.pkl'))

In [4]:
df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21655 entries, 0 to 21654
Data columns (total 138 columns):
 #    Column        Dtype  
---   ------        -----  
 0    Rk            int64  
 1    Player        object 
 2    Pos           object 
 3    Age           int64  
 4    Tm            object 
 5    G             int64  
 6    GS            int64  
 7    MP_pg         float64
 8    FG_pg         float64
 9    FGA_pg        float64
 10   FG%           float64
 11   3P_pg         float64
 12   3PA_pg        float64
 13   3P%           float64
 14   2P_pg         float64
 15   2PA_pg        float64
 16   2P%           float64
 17   eFG%          float64
 18   FT_pg         float64
 19   FTA_pg        float64
 20   FT%           float64
 21   ORB_pg        float64
 22   DRB_pg        float64
 23   TRB_pg        float64
 24   AST_pg        float64
 25   STL_pg        float64
 26   BLK_pg        float64
 27   TOV_pg        float64
 28   PF_pg         float64
 29   PTS_pg        fl

In [5]:
def check_rk_season(df, rk_season_pairs):
    return df.apply(lambda x: (x['Rk'], x['Season']) in rk_season_pairs, axis = 1)

In [6]:
def drop_players_multiteams(df):
    df_tot = df[df['Tm'] == 'TOT']
    rk_season_pairs = list(zip(df_tot['Rk'], df_tot['Season']))
    df_tot_full = df[check_rk_season(df, rk_season_pairs)]
    drop_index = df_tot_full[df_tot_full['Tm'] != 'TOT'].index
    return df.drop(drop_index).reset_index(drop = True)

In [7]:
df = drop_players_multiteams(df)

In [8]:
df.describe()

Unnamed: 0,Rk,Age,G,GS,MP_pg,FG_pg,FGA_pg,FG%,3P_pg,3PA_pg,...,WS/48_rank,OBPM_rank,DBPM_rank,BPM_rank,VORP_rank,%W_rank,%GS,Votes,Share,MaxVotes
count,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,...,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0,17683.0
mean,216.510321,26.659221,54.10004,25.148448,20.510897,3.210355,7.039173,0.44175,0.448504,1.299434,...,109.805293,64.566194,39.448001,71.441215,42.7199,28.788837,0.350863,6.415597,0.006084,1070.038851
std,130.32204,4.07873,25.217314,29.720575,10.104615,2.284105,4.709932,0.094254,0.636592,1.682514,...,56.590689,28.557129,14.085333,32.326389,12.282598,22.199965,0.462984,64.30399,0.059236,208.777846
min,1.0,18.0,1.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,0.0,0.0,221.0
25%,106.0,23.0,35.0,0.0,12.1,1.4,3.3,0.405,0.0,0.0,...,68.0,45.0,31.0,50.0,37.0,10.0,0.0,0.0,0.0,980.0
50%,211.0,26.0,62.0,10.0,20.0,2.7,5.9,0.446,0.1,0.5,...,106.0,63.0,39.0,70.0,46.0,24.0,0.195,0.0,0.0,1150.0
75%,317.5,29.0,77.0,51.0,29.0,4.6,9.9,0.488,0.7,2.1,...,148.0,82.0,48.0,91.0,51.0,44.0,0.8395,0.0,0.0,1230.0
max,540.0,44.0,85.0,83.0,43.7,13.4,27.8,1.0,5.3,13.2,...,287.0,164.0,98.0,183.0,69.0,98.0,1.0,1310.0,1.0,1310.0


In [9]:
df_corr = df.corr()

In [10]:
df_corr[(df_corr > 0.99) & (df_corr < 1)].dropna(how = 'all').dropna(how = 'all', axis = 1)

Unnamed: 0,FG_tot,3P_tot,3PA_tot,2P_tot,2PA_tot,PTS_tot,FGA_tot_rank,PTS_tot_rank
FG_tot,,,,,,0.992847,,
3P_tot,,,0.99141,,,,,
3PA_tot,,0.99141,,,,,,
2P_tot,,,,,0.992516,,,
2PA_tot,,,,0.992516,,,,
PTS_tot,0.992847,,,,,,,
FGA_tot_rank,,,,,,,,0.992695
PTS_tot_rank,,,,,,,0.992695,


In [11]:
df.drop(columns = ['FG_tot', '3PA_tot', '2PA_tot', 'FGA_tot_rank'], inplace = True)

In [12]:
df['Contender'] = df['Share'] > 0

In [13]:
df.loc[df.groupby('Season')['Share'].idxmax(), 'Contender'] = 'MVP'

In [14]:
def corr_share(df, stat):
    fig = px.scatter(df, x = stat, y = 'Share', color = 'Contender', hover_name = 'Player', hover_data = ['Season'])
    fig.show()

In [15]:
corr_share(df, 'PTS_pg')

In [16]:
app = JupyterDash(__name__)
app.layout = html.Div([
    html.H1('Correlation between a chosen stat and MVP votes', style = {'color': 'white'}),
    dcc.Graph(id = 'graph', figure = {}),
    html.Label('Player stat'),
    dcc.Dropdown(id = 'dropdown', options = df.columns, value = 'PTS_pg')
])

@app.callback(Output('graph', 'figure'), Input('dropdown', 'value'))

def update_graph(value):
    fig = px.scatter(df, x = value, y = 'Share', color = 'Contender', hover_name = 'Player', hover_data = ['Season'])
    return fig

app.run_server(mode = 'inline')