In [108]:
import pandas as pd
import requests
import json

In [109]:
df = pd.read_csv('./stats/player/dlillard_2021-22_off_stats.csv')

print(df.shape)
df.head()

(189, 27)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,TEAM_PTS,...,MATCHUP_FG3A,MATCHUP_FG3_PCT,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
0,22021,203081,Damian Lillard,1628969,Mikal Bridges,3,24:22,126.3,28,132,...,12,0.333,0,0,0,0,0,0,0,1461.9
1,22021,203081,Damian Lillard,1628415,Dillon Brooks,2,19:21,94.3,20,88,...,10,0.4,0,0,0,0,4,5,3,1161.5
2,22021,203081,Damian Lillard,1630178,Tyrese Maxey,2,13:19,65.1,23,83,...,11,0.455,0,0,0,0,2,2,0,798.7
3,22021,203081,Damian Lillard,202339,Eric Bledsoe,3,10:10,58.7,18,62,...,6,0.667,0,0,0,0,2,2,1,609.8
4,22021,203081,Damian Lillard,202340,Avery Bradley,2,10:31,55.6,7,46,...,7,0.143,0,0,0,0,4,4,0,630.9


**Convert totals to per 100 possessions**

Most limited sample size, convert each to per possession and then multiply by 100 (more standard to measure in per 100 possessions rather than per possession, as generally nba teams average [100 possessions per game](https://www.teamrankings.com/nba/stat/possessions-per-game)).

In [110]:
MIN_MATCHUP_MINS = 5

In [116]:
data = df.copy()
data = data[data['MATCHUP_TIME_SEC'] > MIN_MATCHUP_MINS * 60] # Must have played more than x minutes
data.drop(columns=['TEAM_PTS', 'MATCHUP_FG_PCT', 'MATCHUP_FG3_PCT'], inplace=True)

def per_100_poss(x):
    return x / data['PARTIAL_POSS'] * 100

# Set stats to per 100 possessions
data = data.apply(lambda x: per_100_poss(x) if x.name not in data.columns[0:8] else x)

data.sort_values('PLAYER_PTS', ascending=False)
print(data.shape)
data.head()

(21, 24)


Unnamed: 0,SEASON_ID,OFF_PLAYER_ID,OFF_PLAYER_NAME,DEF_PLAYER_ID,DEF_PLAYER_NAME,GP,MATCHUP_MIN,PARTIAL_POSS,PLAYER_PTS,MATCHUP_AST,...,MATCHUP_FG3M,MATCHUP_FG3A,HELP_BLK,HELP_FGM,HELP_FGA,HELP_FG_PERC,MATCHUP_FTM,MATCHUP_FTA,SFL,MATCHUP_TIME_SEC
0,22021,203081,Damian Lillard,1628969,Mikal Bridges,3,24:22,126.3,22.169438,11.876485,...,3.167063,9.501188,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1157.482185
1,22021,203081,Damian Lillard,1628415,Dillon Brooks,2,19:21,94.3,21.208908,4.241782,...,4.241782,10.604454,0.0,0.0,0.0,0.0,4.241782,5.302227,3.181336,1231.707317
2,22021,203081,Damian Lillard,1630178,Tyrese Maxey,2,13:19,65.1,35.330261,16.897081,...,7.680492,16.897081,0.0,0.0,0.0,0.0,3.072197,3.072197,0.0,1226.88172
3,22021,203081,Damian Lillard,202339,Eric Bledsoe,3,10:10,58.7,30.664395,10.221465,...,6.81431,10.221465,0.0,0.0,0.0,0.0,3.407155,3.407155,1.703578,1038.841567
4,22021,203081,Damian Lillard,202340,Avery Bradley,2,10:31,55.6,12.589928,7.194245,...,1.798561,12.589928,0.0,0.0,0.0,0.0,7.194245,7.194245,0.0,1134.71223


**Retrieve Defensive Stats**

Retrieves defensive data for each defending player

In [120]:
def_df = pd.read_csv('./stats/2021-22_def_stats.csv')

# Get def stats only from selected defenders
def_df = def_df[def_df['PLAYER_ID'].isin(data['DEF_PLAYER_ID'].tolist())]

print(def_df.shape)
def_df.head()

(21, 18)


Unnamed: 0,PLAYER_ID,PLAYER_NAME,PLAYER_POSITION,AGE,GP,G,FREQ,D_FGM,D_FGA,D_FG_PCT,NORMAL_FG_PCT,PCT_PLUSMINUS,W,L,MIN,STL,BLK,DREB
28,202340,Avery Bradley,G,31.0,41,41,1.0,4.29,8.24,0.521,0.448,0.073,15.0,26.0,29.0,0.7,1.7,5.0
92,1629012,Collin Sexton,G,23.0,11,11,1.0,4.09,9.27,0.441,0.448,-0.007,14.0,30.0,33.5,0.7,0.2,4.8
119,1630558,Davion Mitchell,G,23.0,43,43,1.0,4.05,9.74,0.415,0.441,-0.026,26.0,17.0,24.4,0.6,0.1,2.8
123,1628368,De'Aaron Fox,G,24.0,42,42,1.0,6.67,13.1,0.509,0.448,0.061,17.0,32.0,29.0,0.9,0.3,3.3
125,1629001,De'Anthony Melton,G,23.0,41,41,1.0,4.56,9.07,0.503,0.446,0.057,0.0,1.0,4.7,1.0,1.0,0.0


**Keras Model Implementation**

Use Keras Model from Tensorflow to predict offensive player's stats.
If there is an error, make sure to first install tensorflow. This can be done through Anaconda, or through the command 

```
!pip install tensorflow
```

In [122]:
import tensorflow as tf