In [8]:
from math import sqrt

import os
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

import shap
import plotly.express as px

# For shap to work with keras, disable v2 behavior
tf.compat.v1.disable_v2_behavior()

In [9]:
loc = './stats/'
MODEL_DIR = './models/'

In [10]:
X = pd.read_csv(loc + 'X.csv')
y = pd.read_csv(loc + 'y.csv')

y = y['PLAYER_PPG']

# Apply normalization to input
X = (X - X.mean()) / X.std()

# Baseline Model

A basic model that predicts the offensive player's points per 100 possessions will be their average points per 100 possessions

In [11]:
X_base = X.copy()
y_base = y.copy()

X_base = X_base['OFF_PTS'].to_numpy()
y_base = y_base.to_numpy()

rmse = np.sqrt(np.mean((X_base - y_base) ** 2))
mae = mean_absolute_error(y_base, X_base)
print('RMSE: ' + str(rmse))
print('MAE: ' + str(mae))

RMSE: 19.428045942801486
MAE: 15.227051267591593


# Sequential Model

Creates a simple Keras model composed of single stack layers connected sequentially.

In [12]:
# Remove points from input dataset
X_seq = X.copy()
X_seq.drop(columns=['OFF_POST_TOUCH_PTS', 'OFF_POST_TOUCH_FG_PCT', 'OFF_PTS'], inplace=True)

# Sort columns alphabetically
X_seq = X_seq.sort_index(axis=1)

# Generate training and verification data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y, test_size=0.2)

print('Training Entries')
X_train.info()
X_train.describe()

print('Testing Entries')
X_test.info()
X_test.describe()

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

Training Entries
<class 'pandas.core.frame.DataFrame'>
Int64Index: 78444 entries, 71535 to 39261
Data columns (total 51 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   DEF_AGE                   78444 non-null  float64
 1   DEF_BLK                   78444 non-null  float64
 2   DEF_BOXOUTS               78444 non-null  float64
 3   DEF_CHARGES_DRAWN         78444 non-null  float64
 4   DEF_CONTESTED_SHOTS       78444 non-null  float64
 5   DEF_CONTESTED_SHOTS_3PT   78444 non-null  float64
 6   DEF_DEF_WS                78444 non-null  float64
 7   DEF_DREB                  78444 non-null  float64
 8   DEF_DREB_PCT              78444 non-null  float64
 9   DEF_D_FGA                 78444 non-null  float64
 10  DEF_D_FG_PCT              78444 non-null  float64
 11  DEF_GP                    78444 non-null  float64
 12  DEF_L                     78444 non-null  float64
 13  DEF_MIN                   78444 non-null

In [13]:
# Create model
model = Sequential()
model.add(Dense(int(X_seq.shape[1] / 2), activation='relu', input_shape=(X_seq.shape[1],)))
model.add(Dropout(0.7))
model.add(Dense(1, activation='relu')) # y.shape[1]

# Compile model
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['accuracy'])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 25)                1300      
                                                                 
 dropout_1 (Dropout)         (None, 25)                0         
                                                                 
 dense_3 (Dense)             (None, 1)                 26        
                                                                 
Total params: 1,326
Trainable params: 1,326
Non-trainable params: 0
_________________________________________________________________


In [14]:
# Set batch size
bsize = int(X_train.shape[0] * 0.6)

# Fit model
%time history = model.fit(X_train, y_train, epochs=100000, validation_split=0.2, batch_size=bsize, verbose=0)

`Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.


Wall time: 1h 39min 58s


In [15]:
y_pred = model.predict(X_test)

# Evaluate error
score = model.evaluate(X_test, y_test)
mae = mean_absolute_error(y_test, y_pred)
print('RMSE: ' + str(sqrt(score[0])))
print('MAE: ' + str(mae))

diff = abs(y_pred.flatten() - y_test.flatten())
predictions = pd.DataFrame(data={'predicted': y_pred.flatten(), 'actual': y_test.flatten(), 'abs_diff': diff})
# predictions.sort_values(by='abs_diff')
predictions.sort_values(by='abs_diff', ascending=False)

`Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.


RMSE: 9.966618603951218
MAE: 6.9327951579532705


Unnamed: 0,predicted,actual,abs_diff
15111,25.035191,125.838926,100.803736
6522,36.753395,133.739662,96.986267
13071,36.504189,130.254375,93.750186
11899,27.774136,118.896702,91.122566
14877,33.017010,121.984782,88.967773
...,...,...,...
11462,13.367222,13.372093,0.004871
13123,19.565025,19.568345,0.003320
14698,17.627695,17.630283,0.002588
3037,30.096813,30.099250,0.002436


In [16]:
# Plot training and validation loss
fig = px.line()
fig.add_scatter(y=history.history['loss'], name='Train')
fig.add_scatter(y=history.history['val_loss'], name='Validation')

fig.update_layout(title='Train vs Validation Loss',
                   xaxis_title='Epochs',
                   yaxis_title='Loss')

fig.show()

In [17]:
fig = px.histogram(predictions, x='predicted')
fig.show()

In [18]:
fig = px.histogram(predictions, x='actual')
fig.show()

In [19]:
# compute SHAP values
explainer = shap.DeepExplainer(model, X_train[0:1000])
shap_values = explainer.shap_values(X_test)

shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], features = X_seq.columns)


keras is no longer supported, please use tf.keras instead.


Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.






In [20]:
import tensorflowjs as tfjs

if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

name = input('Set model name: ')
model.save(MODEL_DIR + name + '_keras')
tfjs.converters.save_keras_model(model, MODEL_DIR + name + '_js')
