In [11]:
import pandas as pd
import requests
import json

In [12]:
df = pd.read_csv('./stats/player/dlillard_2021-22_off_stats.csv')

print(df.shape)
df.head()

(189, 27)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,TEAM_PTS,...,MATCHUP_FG3A,MATCHUP_FG3_PCT,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
0,22021,203081,Damian Lillard,1628969,Mikal Bridges,3,24:22,126.3,28,132,...,12,0.333,0,0,0,0,0,0,0,1461.9
1,22021,203081,Damian Lillard,1628415,Dillon Brooks,2,19:21,94.3,20,88,...,10,0.4,0,0,0,0,4,5,3,1161.5
2,22021,203081,Damian Lillard,1630178,Tyrese Maxey,2,13:19,65.1,23,83,...,11,0.455,0,0,0,0,2,2,0,798.7
3,22021,203081,Damian Lillard,202339,Eric Bledsoe,3,10:10,58.7,18,62,...,6,0.667,0,0,0,0,2,2,1,609.8
4,22021,203081,Damian Lillard,202340,Avery Bradley,2,10:31,55.6,7,46,...,7,0.143,0,0,0,0,4,4,0,630.9


**Convert totals to per 100 possessions**

Most limited sample size, convert each to per possession and then multiply by 100 (more standard to measure in per 100 possessions rather than per possession, as generally nba teams average [100 possessions per game](https://www.teamrankings.com/nba/stat/possessions-per-game)).

In [13]:
MIN_MATCHUP_MINS = 5

In [26]:
off_df = df.copy()
off_df = off_df[off_df['MATCHUP_TIME_SEC'] > MIN_MATCHUP_MINS * 60] # Must have played more than x minutes
off_df.drop(columns=['TEAM_PTS', 'MATCHUP_FG_PCT', 'MATCHUP_FG3_PCT'], inplace=True)

def per_100_poss(x):
    return x / off_df['PARTIAL_POSS'] * 100

# Set stats to per 100 possessions
off_df = off_df.apply(lambda x: per_100_poss(x) if x.name not in off_df.columns[0:8] else x)

off_df.sort_values('DEF_PLAYER_NAME', ascending=True, inplace=True)
print(off_df.shape)
off_df.head()

(21, 24)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,MATCHUP_AST,...,MATCHUP_FG3M,MATCHUP_FG3A,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
4,22021,203081,Damian Lillard,202340,Avery Bradley,2,10:31,55.6,12.589928,7.194245,...,1.798561,12.589928,0.0,0.0,0.0,0.0,7.194245,7.194245,0.0,1134.71223
19,22021,203081,Damian Lillard,1629012,Collin Sexton,1,5:07,26.9,11.152416,7.434944,...,0.0,7.434944,0.0,0.0,0.0,0.0,3.717472,3.717472,3.717472,1140.520446
5,22021,203081,Damian Lillard,1630558,Davion Mitchell,2,9:16,47.5,23.157895,6.315789,...,0.0,14.736842,0.0,0.0,0.0,0.0,18.947368,18.947368,4.210526,1169.894737
16,22021,203081,Damian Lillard,1628368,De'Aaron Fox,2,5:01,32.4,27.777778,9.259259,...,3.08642,12.345679,0.0,0.0,0.0,0.0,6.17284,9.259259,3.08642,929.938272
15,22021,203081,Damian Lillard,1629001,De'Anthony Melton,3,7:03,36.3,11.019284,11.019284,...,0.0,2.754821,0.0,0.0,0.0,0.0,11.019284,11.019284,5.509642,1163.911846


**Retrieve Defensive Stats**

Retrieves defensive data for each defending player

In [23]:
def_df = pd.read_csv('./stats/2021-22_def_stats.csv')

# Get def stats only from selected defenders
def_df = def_df[def_df['PLAYER_ID'].isin(off_df['DEF_PLAYER_ID'].tolist())]

print(def_df.shape)
def_df.head()

(21, 18)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,PLAYER_POSITION,AGE,GP,G,FREQ,D_FGM,D_FGA,D_FG_PCT,NORMAL_FG_PCT,PCT_PLUSMINUS,W,L,MIN,STL,BLK,DREB
28,202340,Avery Bradley,G,31.0,41,41,1.0,4.29,8.24,0.521,0.448,0.073,15.0,26.0,29.0,0.7,1.7,5.0
92,1629012,Collin Sexton,G,23.0,11,11,1.0,4.09,9.27,0.441,0.448,-0.007,14.0,30.0,33.5,0.7,0.2,4.8
119,1630558,Davion Mitchell,G,23.0,43,43,1.0,4.05,9.74,0.415,0.441,-0.026,27.0,17.0,24.6,0.6,0.1,2.8
123,1628368,De'Aaron Fox,G,24.0,42,42,1.0,6.67,13.1,0.509,0.448,0.061,17.0,32.0,29.0,0.9,0.3,3.3
125,1629001,De'Anthony Melton,G,23.0,41,41,1.0,4.56,9.07,0.503,0.446,0.057,19.0,14.0,12.9,0.3,0.1,1.1


**Keras Model Implementation**

Use Keras Model from Tensorflow to predict offensive player's stats.
If there is an error, make sure to first install tensorflow. This can be done through Anaconda, or through the command 

```
!pip install tensorflow
```

In [70]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers

In [92]:
# Clean data to use on model
X = def_df[def_df.columns[7:]].drop(columns=['W', 'L', 'MIN'])
y = off_df[off_df.columns[8:12]]

# Generate training and verification data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X

Unnamed: 0,D_FGM,D_FGA,D_FG_PCT,NORMAL_FG_PCT,PCT_PLUSMINUS,STL,BLK,DREB
28,4.29,8.24,0.521,0.448,0.073,0.7,1.7,5.0
92,4.09,9.27,0.441,0.448,-0.007,0.7,0.2,4.8
119,4.05,9.74,0.415,0.441,-0.026,0.6,0.1,2.8
123,6.67,13.1,0.509,0.448,0.061,0.9,0.3,3.3
125,4.56,9.07,0.503,0.446,0.057,0.3,0.1,1.1
152,5.14,11.67,0.441,0.435,0.005,0.0,0.7,1.3
171,4.3,9.18,0.468,0.449,0.019,0.2,0.0,1.0
182,6.73,14.34,0.469,0.45,0.019,,,
340,5.34,10.81,0.494,0.457,0.037,0.8,0.6,4.0
366,6.66,14.46,0.46,0.447,0.013,0.0,0.0,0.5


In [93]:
# Define inputs, outputs, and layers
# TODO: Research layers shape and activation functions
inputs = keras.Input(shape=(X.columns.size,), dtype='float64')

x = tf.keras.layers.Dense(8, activation='relu')(inputs)

outputs = layers.Dense(y.columns.size, activation='relu')(x)

# Create model
model = keras.Model(inputs=inputs, outputs=outputs)
model.summary()

Model: "model_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_20 (InputLayer)       [(None, 8)]               0         
                                                                 
 dense_24 (Dense)            (None, 8)                 72        
                                                                 
 dense_25 (Dense)            (None, 4)                 36        
                                                                 
Total params: 108
Trainable params: 108
Non-trainable params: 0
_________________________________________________________________


In [91]:
# Compile model
# TODO: Research optimizer and loss
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')


