In [617]:
import pandas as pd
import requests
import json
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split

import shap
import plotly.express as px

from minisom import MiniSom

# For shap to work with keras, disable v2 behavior
tf.compat.v1.disable_v2_behavior()

In [734]:
season = '2019-20'
percentile = 0.2

df = pd.read_csv('./stats/' + season + '_' + str(percentile) + '_h2h_stats.csv')
# df = pd.read_csv('./stats/h2h_combined.csv')
df = df.fillna(0)

print(df.shape)
df.head()

(114829, 27)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,TEAM_PTS,...,MATCHUP_FG3A,MATCHUP_FG3_PCT,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
0,22019,203932,Aaron Gordon,202710,Jimmy Butler,4,25:22,117.3,24,110,...,10,0.4,0,0,0,0,2,2,0,1521.9
1,22019,203932,Aaron Gordon,1627823,Juancho Hernangomez,3,15:56,87.2,10,97,...,5,0.2,0,0,0,0,1,1,1,956.2
2,22019,203932,Aaron Gordon,203114,Khris Middleton,4,17:50,87.1,13,82,...,6,0.333,0,0,0,0,3,5,3,1070.2
3,22019,203932,Aaron Gordon,203933,T.J. Warren,2,16:36,81.6,25,82,...,5,0.6,0,0,0,0,2,4,2,995.6
4,22019,203932,Aaron Gordon,203084,Harrison Barnes,2,17:33,79.4,22,102,...,4,0.75,0,0,0,0,1,1,1,1052.5


# Convert totals to per 100 possessions

Most limited sample size, convert each to per possession and then multiply by 100 (more standard to measure in per 100 possessions rather than per possession, as generally nba teams average [100 possessions per game](https://www.teamrankings.com/nba/stat/possessions-per-game)).

In [735]:
MIN_MATCHUP_MINS = 12

In [736]:
h2h_df = df.copy()
h2h_df = h2h_df[h2h_df['MATCHUP_TIME_SEC'] > MIN_MATCHUP_MINS * 60] # Must have played more than x minutes
h2h_df.drop(columns=['TEAM_PTS', 'MATCHUP_FG_PCT', 'MATCHUP_FG3_PCT'], inplace=True)

def per_100_poss(x):
    return x / h2h_df['PARTIAL_POSS'] * 100

# Set stats to per 100 possessions
h2h_df = h2h_df.apply(lambda x: per_100_poss(x) if x.name not in h2h_df.columns[0:8] else x)

h2h_df.sort_values('DEF_PLAYER_NAME', ascending=True, inplace=True)

# Remove rows with zeros in important columns
check = h2h_df[h2h_df.columns[8:11]] != 0
h2h_df = h2h_df[check['PLAYER_PTS'] & check['MATCHUP_AST']]

print(h2h_df.shape)
h2h_df.head()

(1076, 24)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,MATCHUP_AST,...,MATCHUP_FG3M,MATCHUP_FG3A,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
60710,22019,1627823,Juancho Hernangomez,203932,Aaron Gordon,3,17:09,102.8,10.700389,2.918288,...,1.945525,6.809339,0.0,0.0,0.0,0.0,0.972763,0.972763,0.972763,1001.459144
73324,22019,1629029,Luka Doncic,203932,Aaron Gordon,2,17:37,92.7,22.653722,4.314995,...,0.0,7.551241,0.0,0.0,0.0,0.0,5.393743,6.472492,2.157497,1140.12945
41217,22019,203507,Giannis Antetokounmpo,203932,Aaron Gordon,3,16:38,100.5,29.850746,12.935323,...,1.99005,3.9801,0.0,0.0,0.0,0.0,5.970149,7.960199,3.9801,993.333333
102201,22019,203933,T.J. Warren,203932,Aaron Gordon,2,15:58,74.7,26.773762,4.016064,...,2.677376,4.016064,0.0,0.0,0.0,0.0,2.677376,2.677376,1.338688,1282.597055
109211,22019,1626168,Trey Lyles,203932,Aaron Gordon,2,12:05,61.2,22.875817,1.633987,...,1.633987,4.901961,0.0,0.0,0.0,0.0,1.633987,1.633987,1.633987,1184.313725


# Sanitize Outputs

The program is only as good as the data you give it

In [737]:
features = ['PLAYER_PTS', 'MATCHUP_AST', 'MATCHUP_TOV']

som_data = h2h_df[features].values # h2h_df.columns[7:10]
som = MiniSom(20, 20, len(features), sigma=5, learning_rate=0.5,
              neighborhood_function='gaussian', random_seed=10)

%time som.train(som_data, 20000, random_order=False)  # random training

quantization_errors = np.linalg.norm(som.quantization(som_data) - som_data, axis=1)

error_threshold = np.percentile(quantization_errors, 
                               100*(1-.25))+(4*(np.percentile(quantization_errors, 
                               100*(1-.25)) - np.percentile(quantization_errors, 100*(1-.75))))

is_outlier = quantization_errors > error_threshold
error_threshold

Wall time: 1.06 s


6.472049690111928

In [738]:
plt = px.scatter(data_frame = h2h_df, x = 'PLAYER_PTS', y = 'MATCHUP_TIME_SEC', color = is_outlier)
plt.show()

In [739]:
out = h2h_df
# [is_outlier == False]
out.shape

(1076, 24)

# Retrieve Offensive and Defensive Stats

Retrieves defensive data for each defending player

In [740]:
data = pd.read_csv('./stats/'+ season + '_def_stats.csv')
# data = pd.read_csv('./stats/def_combined.csv')
data = data.fillna(0)

# Get def stats only from selected defenders (rename player_id to def_player_id to merge arrays)
data = data.add_prefix('DEF_')
data.rename(columns={'DEF_SEASON_ID': 'SEASON_ID', 'DEF_DEF_RATING': 'DEF_RATING', 
                     'DEF_DEF_BOXOUTS': 'DEF_BOXOUTS'}, inplace=True)
def_df = pd.merge(data, out[['DEF_PLAYER_ID', 'SEASON_ID']], how ='inner', on=['DEF_PLAYER_ID', 'SEASON_ID'])
def_df.drop(columns=['DEF_GP', 'DEF_G', 'DEF_D_FG_PCT', 'DEF_DREB_PCT', 'DEF_PCT_STL', 'DEF_PCT_BLK'], inplace=True)

# Add offensive player (helps merging offensive stats)
print(def_df.shape[0], out.shape[0])
def_df['OFF_PLAYER_ID'] = out['OFF_PLAYER_ID'].to_numpy()
def_df['OFF_PLAYER_NAME'] = out['OFF_PLAYER_NAME'].to_numpy()

print(def_df.columns)
def_df

1076 1076
Index(['DEF_PLAYER_ID', 'DEF_PLAYER_NAME', 'DEF_AGE', 'DEF_D_FGM', 'DEF_D_FGA',
       'DEF_NORMAL_FG_PCT', 'DEF_PCT_PLUSMINUS', 'DEF_W', 'DEF_L', 'DEF_MIN',
       'DEF_STL', 'DEF_BLK', 'DEF_DREB', 'DEF_CONTESTED_SHOTS',
       'DEF_CONTESTED_SHOTS_2PT', 'DEF_CONTESTED_SHOTS_3PT', 'DEF_DEFLECTIONS',
       'DEF_CHARGES_DRAWN', 'DEF_BOXOUTS', 'DEF_PCT_BOX_OUTS_REB', 'SEASON_ID',
       'DEF_RATING', 'DEF_OPP_PTS_OFF_TOV', 'DEF_OPP_PTS_2ND_CHANCE',
       'DEF_OPP_PTS_FB', 'DEF_OPP_PTS_PAINT', 'DEF_DEF_WS', 'OFF_PLAYER_ID',
       'OFF_PLAYER_NAME'],
      dtype='object')


Unnamed: 0,DEF_PLAYER_ID,DEF_PLAYER_NAME,DEF_AGE,DEF_D_FGM,DEF_D_FGA,DEF_NORMAL_FG_PCT,DEF_PCT_PLUSMINUS,DEF_W,DEF_L,DEF_MIN,...,DEF_PCT_BOX_OUTS_REB,SEASON_ID,DEF_RATING,DEF_OPP_PTS_OFF_TOV,DEF_OPP_PTS_2ND_CHANCE,DEF_OPP_PTS_FB,DEF_OPP_PTS_PAINT,DEF_DEF_WS,OFF_PLAYER_ID,OFF_PLAYER_NAME
0,203932,Aaron Gordon,24.0,5.31,11.98,0.468,-0.025,22.0,17.0,32.8,...,0.581,22019,107.4,12.8,9.1,11.5,35.8,0.111,1627823,Juancho Hernangomez
1,203932,Aaron Gordon,24.0,5.31,11.98,0.468,-0.025,22.0,17.0,32.8,...,0.581,22019,107.4,12.8,9.1,11.5,35.8,0.111,1629029,Luka Doncic
2,203932,Aaron Gordon,24.0,5.31,11.98,0.468,-0.025,22.0,17.0,32.8,...,0.581,22019,107.4,12.8,9.1,11.5,35.8,0.111,203507,Giannis Antetokounmpo
3,203932,Aaron Gordon,24.0,5.31,11.98,0.468,-0.025,22.0,17.0,32.8,...,0.581,22019,107.4,12.8,9.1,11.5,35.8,0.111,203933,T.J. Warren
4,203932,Aaron Gordon,24.0,5.31,11.98,0.468,-0.025,22.0,17.0,32.8,...,0.581,22019,107.4,12.8,9.1,11.5,35.8,0.111,1626168,Trey Lyles
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,203897,Zach LaVine,25.0,5.02,11.68,0.454,-0.024,15.0,40.0,16.1,...,0.378,22019,117.3,6.4,5.1,4.7,17.9,0.007,1626224,Cedi Osman
1072,203897,Zach LaVine,25.0,5.02,11.68,0.454,-0.024,15.0,40.0,16.1,...,0.378,22019,117.3,6.4,5.1,4.7,17.9,0.007,1628984,Devonte' Graham
1073,1629627,Zion Williamson,19.0,5.50,11.96,0.459,0.001,22.0,43.0,25.8,...,0.364,22019,108.8,9.0,6.6,7.1,26.8,0.077,2546,Carmelo Anthony
1074,1629627,Zion Williamson,19.0,5.50,11.96,0.459,0.001,22.0,43.0,25.8,...,0.364,22019,108.8,9.0,6.6,7.1,26.8,0.077,201567,Kevin Love


In [741]:
data = pd.read_csv('./stats/' + season + '_' + str(percentile) + '_off_stats.csv')
# data = pd.read_csv('./stats/off_combined.csv')
data = data.fillna(0)

# Get off stats only from selected offensive players
data = data.add_prefix('OFF_')
data.rename(columns={'OFF_SEASON_ID': 'SEASON_ID'}, inplace=True)
combine = pd.merge(def_df, data, how='inner', on=['OFF_PLAYER_ID', 'SEASON_ID'])

# Get correct sort
combine.sort_values(['DEF_PLAYER_ID', 'OFF_PLAYER_ID'], inplace=True)
print(combine.shape)
print(combine.columns)
combine.head()

(1076, 68)
Index(['DEF_PLAYER_ID', 'DEF_PLAYER_NAME', 'DEF_AGE', 'DEF_D_FGM', 'DEF_D_FGA',
       'DEF_NORMAL_FG_PCT', 'DEF_PCT_PLUSMINUS', 'DEF_W', 'DEF_L', 'DEF_MIN',
       'DEF_STL', 'DEF_BLK', 'DEF_DREB', 'DEF_CONTESTED_SHOTS',
       'DEF_CONTESTED_SHOTS_2PT', 'DEF_CONTESTED_SHOTS_3PT', 'DEF_DEFLECTIONS',
       'DEF_CHARGES_DRAWN', 'DEF_BOXOUTS', 'DEF_PCT_BOX_OUTS_REB', 'SEASON_ID',
       'DEF_RATING', 'DEF_OPP_PTS_OFF_TOV', 'DEF_OPP_PTS_2ND_CHANCE',
       'DEF_OPP_PTS_FB', 'DEF_OPP_PTS_PAINT', 'DEF_DEF_WS', 'OFF_PLAYER_ID',
       'OFF_PLAYER_NAME', 'OFF_AGE', 'OFF_GP', 'OFF_MIN', 'OFF_FGM', 'OFF_FGA',
       'OFF_FG_PCT', 'OFF_FG3M', 'OFF_FG3A', 'OFF_FG3_PCT', 'OFF_FTM',
       'OFF_FTA', 'OFF_FT_PCT', 'OFF_OREB', 'OFF_DREB', 'OFF_REB', 'OFF_AST',
       'OFF_TOV', 'OFF_BLKA', 'OFF_PF', 'OFF_PTS', 'OFF_TOUCHES',
       'OFF_PAINT_TOUCHES', 'OFF_PAINT_TOUCH_FGM', 'OFF_PAINT_TOUCH_FGA',
       'OFF_PAINT_TOUCH_PASSES', 'OFF_PAINT_TOUCH_TOV', 'OFF_DRIVE_PTS',
       'OFF_DRIVE_

Unnamed: 0,DEF_PLAYER_ID,DEF_PLAYER_NAME,DEF_AGE,DEF_D_FGM,DEF_D_FGA,DEF_NORMAL_FG_PCT,DEF_PCT_PLUSMINUS,DEF_W,DEF_L,DEF_MIN,...,OFF_CATCH_SHOOT_FG_PCT,OFF_PULL_UP_PTS,OFF_PULL_UP_FG_PCT,OFF_PAINT_TOUCH_PTS,OFF_PAINT_TOUCH_FG_PCT,OFF_POST_TOUCH_PTS,OFF_POST_TOUCH_FG_PCT,OFF_ELBOW_TOUCH_PTS,OFF_ELBOW_TOUCH_FG_PCT,OFF_EFF_FG_PCT
453,2544,LeBron James,35.0,3.91,9.31,0.463,-0.043,2.0,1.0,3.8,...,0.451,5.7,0.444,0.8,0.636,0.8,0.457,0.5,0.538,0.536
84,2544,LeBron James,35.0,3.91,9.31,0.463,-0.043,2.0,1.0,3.8,...,0.411,9.6,0.427,1.1,0.571,2.4,0.427,0.8,0.618,0.524
512,2544,LeBron James,35.0,3.91,9.31,0.463,-0.043,2.0,1.0,3.8,...,0.328,2.1,0.363,1.1,0.795,0.0,0.5,0.3,0.636,0.516
866,2544,LeBron James,35.0,3.91,9.31,0.463,-0.043,2.0,1.0,3.8,...,0.352,2.9,0.449,2.9,0.566,0.0,1.0,0.9,0.553,0.519
667,2544,LeBron James,35.0,3.91,9.31,0.463,-0.043,2.0,1.0,3.8,...,0.384,0.3,0.257,2.1,0.606,0.1,1.0,0.4,0.8,0.576


In [742]:
# Sort head to head stats by same sort
h2h_df.sort_values(['DEF_PLAYER_ID', 'OFF_PLAYER_ID'], inplace=True)
h2h_df.head()

Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,MATCHUP_AST,...,MATCHUP_FG3M,MATCHUP_FG3A,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
25370,22019,201942,DeMar DeRozan,2544,LeBron James,3,19:58,93.2,15.021459,5.364807,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1285.622318
62531,22019,202695,Kawhi Leonard,2544,LeBron James,4,15:07,74.9,32.042724,4.00534,...,1.335113,4.00534,0.0,0.0,0.0,0.0,1.335113,2.670227,0.0,1211.081442
48573,22019,203109,Jae Crowder,2544,LeBron James,2,13:19,60.6,9.90099,1.650165,...,3.30033,11.551155,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1317.986799
63755,22019,1626162,Kelly Oubre Jr.,2544,LeBron James,3,13:04,68.3,10.248902,2.928258,...,1.464129,4.392387,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1147.584187
31394,22019,1627827,Dorian Finney-Smith,2544,LeBron James,4,16:46,84.4,10.663507,1.184834,...,3.554502,5.924171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1191.587678


**Machine Learning Implementation**

Utilizes the keras Model from Tensorflow to predict offensive player's stats.
If there is an error, make sure to first install tensorflow. This can be done through Anaconda, or through the command 

```
!pip install tensorflow
```

In [780]:
# # Clean data to use on model
# X = combine.drop(columns=['DEF_PLAYER_ID', 'DEF_PLAYER_NAME', 'OFF_PLAYER_ID', 'OFF_PLAYER_NAME', 'SEASON_ID'])
# y = out['PLAYER_PTS']

# # Save to csvs
# X.to_csv('./stats/cleaned/' + season + '_X.csv', index=False)
# y.to_csv('./stats/cleaned/' + season + '_y.csv', index=False)

X = pd.read_csv('./stats/cleaned/X_combined.csv')
y = pd.read_csv('./stats/cleaned/y_combined.csv')

print(X.shape, y.shape)

# Apply normalization to input
X = (X - X.mean()) / X.std()

# Generate training and verification data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print('Training Entries')
X_train.info()
X_train.describe()

print('Testing Entries')
X_test.info()
X_test.describe()

X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

(2701, 63) (2701, 1)
Training Entries
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2160 entries, 395 to 1224
Data columns (total 63 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   DEF_AGE                  2160 non-null   float64
 1   DEF_D_FGM                2160 non-null   float64
 2   DEF_D_FGA                2160 non-null   float64
 3   DEF_NORMAL_FG_PCT        2160 non-null   float64
 4   DEF_PCT_PLUSMINUS        2160 non-null   float64
 5   DEF_W                    2160 non-null   float64
 6   DEF_L                    2160 non-null   float64
 7   DEF_MIN                  2160 non-null   float64
 8   DEF_STL                  2160 non-null   float64
 9   DEF_BLK                  2160 non-null   float64
 10  DEF_DREB                 2160 non-null   float64
 11  DEF_CONTESTED_SHOTS      2160 non-null   float64
 12  DEF_CONTESTED_SHOTS_2PT  2160 non-null   float64
 13  DEF_CONTESTED_SHOTS_3PT  2160 non-null

**Keras Model Philosophy**

1. Normalize inputs
2. Dropout layer to prevent overfitting
3. Dense layer

In [781]:
# Create model
model = Sequential()
model.add(Dense(int(X.shape[1] / 2), activation='relu', input_shape=(X.shape[1],)))
model.add(Dropout(0.7))
model.add(Dense(1, activation='relu')) # y.shape[1]

# Compile model
model.compile(optimizer='nadam', loss='mean_squared_error', metrics=['accuracy'])

model.summary()

Model: "sequential_68"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_147 (Dense)           (None, 31)                1984      
                                                                 
 dropout_79 (Dropout)        (None, 31)                0         
                                                                 
 dense_148 (Dense)           (None, 1)                 32        
                                                                 
Total params: 2,016
Trainable params: 2,016
Non-trainable params: 0
_________________________________________________________________


In [782]:
# Fit model
bsize = int(X_train.shape[0] * 0.6) # 30% batch size

%time history = model.fit(X_train, y_train, epochs=7000, validation_split=0.2, batch_size=bsize, verbose=0)


`Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.



Wall time: 51.6 s


In [788]:
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test)
print(score)

diff = abs(y_pred.flatten() - y_test.flatten())
print(diff.mean())
predictions = pd.DataFrame(data={'predicted': y_pred.flatten(), 'actual': y_test.flatten(), 'abs_diff': diff})
predictions.sort_values(by='abs_diff', inplace=True)

s = predictions[predictions['actual'] > 40]
print(s['abs_diff'].mean())
s
# predictions

[53.157312855041845, 0.0]
5.785015267735747
18.235643493997227


Unnamed: 0,predicted,actual,abs_diff
94,34.797291,40.604344,5.807053
360,31.819305,41.794088,9.974782
417,28.563164,40.697674,12.134511
64,30.472565,44.265594,13.793029
96,26.268112,41.394336,15.126223
265,27.558361,44.515103,16.956742
246,36.972935,55.016181,18.043247
214,30.409504,48.70624,18.296737
488,26.380497,45.112782,18.732285
367,19.710093,44.684129,24.974037


In [784]:
fig = px.histogram(x=predictions['abs_diff'], labels={
    'x': 'difference'
})

# nth percentile difference (Lower number better)
print(predictions.quantile(q=0.5)['abs_diff'])
print('Percent acceptable: ' + str(len(predictions[predictions['abs_diff'] < 5]) / len(predictions) * 100))

fig.show()

4.82628540242062
Percent acceptable: 51.20147874306839


In [785]:
# Plot training and validation loss
fig = px.line()
fig.add_scatter(y=history.history['loss'], name='Train')
fig.add_scatter(y=history.history['val_loss'], name='Validation')

fig.update_layout(title='Train vs Validation Loss',
                   xaxis_title='Epochs',
                   yaxis_title='Loss')

fig.show()

In [822]:
# compute SHAP values
explainer = shap.DeepExplainer(model, X_train)
shap_values = explainer.shap_values(X_test)

shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], features = X.columns)


Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.



In [897]:
# Predict matchup
off_id = 203999
def_id = 1626158

# Get offense stats
data = pd.read_csv('./stats/2021-22_0.2_off_stats.csv')
data = data.add_prefix('OFF_')
data.rename(columns={'OFF_SEASON_ID': 'SEASON_ID'}, inplace=True)
inputs_o = data[data['OFF_PLAYER_ID'] == off_id]

# Get defensive stats
def_data = pd.read_csv('./stats/'+ season + '_def_stats.csv')
def_data = def_data.fillna(0)
def_data = def_data.add_prefix('DEF_')
def_data.rename(columns={'DEF_SEASON_ID': 'SEASON_ID', 'DEF_DEF_RATING': 'DEF_RATING', 
                     'DEF_DEF_BOXOUTS': 'DEF_BOXOUTS'}, inplace=True)
def_data.drop(columns=['DEF_GP', 'DEF_G', 'DEF_D_FG_PCT', 'DEF_DREB_PCT', 'DEF_PCT_STL', 'DEF_PCT_BLK'], inplace=True)
inputs_d = def_data[def_data['DEF_PLAYER_ID'] == def_id]

# Get both inputs together
inputs_o.drop(columns=['OFF_PLAYER_ID', 'SEASON_ID'], inplace=True)
inputs_d.drop(columns=['DEF_PLAYER_ID', 'DEF_PLAYER_NAME', 'SEASON_ID'], inplace=True)
inputs_o = inputs_o.reset_index(drop=True)
inputs_d = inputs_d.reset_index(drop=True)
input = pd.concat([inputs_d, inputs_o], axis=1)

X = pd.read_csv('./stats/cleaned/X_combined.csv')

input = (input - X.mean()) / X.std()
model.predict(input.to_numpy())



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



array([[31.981586]], dtype=float32)

In [898]:
32.3 / 66.6 * 36.232136 + 18/66.6*27.47274 + 16.3/66.6*31.981586

32.8244319009009