In [25]:
import numpy as np
import pandas as pd
import altair as alt
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

In [26]:
modern = pd.read_csv('./datasets/Perm.csv')
modern.head()

Unnamed: 0,Team,G,W,L,MP,FG,FGA,FG%,3P,3PA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Golden State Warriors,82,67,15,240.6,41.6,87.0,0.478,10.8,27.0,...,0.768,10.4,34.3,44.7,27.4,9.3,6.0,14.5,19.9,110.0
1,Los Angeles Clippers,82,56,26,240.6,39.4,83.3,0.473,10.1,26.9,...,0.71,9.6,33.1,42.6,24.8,7.8,5.0,12.3,21.3,106.7
2,Dallas Mavericks,82,50,32,242.4,39.7,85.8,0.463,8.9,25.4,...,0.752,10.5,31.8,42.3,22.5,8.1,4.5,13.0,20.0,105.2
3,Toronto Raptors,82,49,33,242.1,37.9,83.3,0.455,8.9,25.1,...,0.787,10.7,30.8,41.5,20.7,7.5,4.4,12.9,20.9,104.0
4,Oklahoma City Thunder,82,45,37,241.8,38.8,86.8,0.447,7.7,22.7,...,0.754,12.8,34.7,47.5,20.5,7.3,5.5,14.7,22.3,104.0


In [27]:
modern = modern.drop(['Team','G'],axis=1)
modern.head()

Unnamed: 0,W,L,MP,FG,FGA,FG%,3P,3PA,3P%,2P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,67,15,240.6,41.6,87.0,0.478,10.8,27.0,0.398,30.8,...,0.768,10.4,34.3,44.7,27.4,9.3,6.0,14.5,19.9,110.0
1,56,26,240.6,39.4,83.3,0.473,10.1,26.9,0.376,29.3,...,0.71,9.6,33.1,42.6,24.8,7.8,5.0,12.3,21.3,106.7
2,50,32,242.4,39.7,85.8,0.463,8.9,25.4,0.352,30.8,...,0.752,10.5,31.8,42.3,22.5,8.1,4.5,13.0,20.0,105.2
3,49,33,242.1,37.9,83.3,0.455,8.9,25.1,0.352,29.0,...,0.787,10.7,30.8,41.5,20.7,7.5,4.4,12.9,20.9,104.0
4,45,37,241.8,38.8,86.8,0.447,7.7,22.7,0.339,31.1,...,0.754,12.8,34.7,47.5,20.5,7.3,5.5,14.7,22.3,104.0


In [28]:
y = (modern['W'])
feature_names = [i for i in modern.columns if modern[i].dtype in [np.float64]]
X = modern[feature_names]
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [29]:
def permutation(x_train, y_train, x_test, y_test):
    model = RandomForestRegressor(random_state=42)
    model.fit(x_train, y_train)
    
    permute = PermutationImportance(
        estimator = model,
        scoring = 'r2',
        n_iter = 4,
        random_state = 42)
    
    permute.fit(x_test, y_test)
    
    columns = x_test.columns.to_list()
    
    feature_importance = permute.feature_importances_
    
    pd.Series(feature_importance, columns).sort_values(ascending=False)
    
    metric = eli5.show_weights(
        estimator = permute,
        top = None,
        feature_names = columns)
        
    return metric

In [30]:
permutation(x_train, y_train, x_test, y_test)

Weight,Feature
0.1840  ± 0.1035,3P%
0.1220  ± 0.1048,2P%
0.0716  ± 0.0508,FG%
0.0345  ± 0.0236,BLK
0.0259  ± 0.0268,TOV
0.0228  ± 0.0223,FGA
0.0210  ± 0.0207,STL
0.0049  ± 0.0129,FT%
0.0043  ± 0.0024,2PA
0.0037  ± 0.0054,PTS


In [31]:
newCorrelation = pd.read_csv('./datasets/Stats 14-19.csv')
newCorrelation = newCorrelation.drop(['G'],axis=1)

In [32]:
correlation = newCorrelation.corr().reset_index().melt('index')
correlation.columns = ['statsX', 'statsY', 'correlation']

matrix = alt.Chart(correlation).mark_rect().encode(
    x=alt.X('statsX', title = 'X'),
    y=alt.Y('statsY', title = 'Y'),
    color=alt.Color('correlation'),
).properties(
    width=alt.Step(40),
    height=alt.Step(40)
)

matrix += matrix.mark_text(size=15).encode(
    text=alt.Text('correlation', format=".2f"),
    color=alt.condition(
        "datum.correlation > 0.5",
        alt.value('white'),
        alt.value('black')
    )
)

matrix

In [33]:
old = pd.read_csv('./datasets/Stats 93-98.csv')
modern = pd.read_csv('./datasets/Stats 14-19.csv')

In [34]:
bars = alt.Chart(old).mark_bar().encode(
    x=alt.X('3PA', stack='zero'),
    y=alt.Y('Team'),
    color=alt.Color('Year')
)

text = alt.Chart(old).mark_text(dx=-15, dy=3, color='white').encode(
    x=alt.X('3PA', stack='zero'),
    y=alt.Y('Team'),
    detail='Year',
    text=alt.Text('3PA', format='.2f')
)

bars + text

In [35]:
bars = alt.Chart(modern).mark_bar().encode(
    x=alt.X('3PA', stack='zero'),
    y=alt.Y('Team'),
    color=alt.Color('Year')
)

text = alt.Chart(modern).mark_text(dx=-15, dy=3, color='white').encode(
    x=alt.X('3PA', stack='zero'),
    y=alt.Y('Team'),
    detail='Year',
    text=alt.Text('3PA', format='.2f')
)

bars + text