In [1]:
# Import libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 25)
import matplotlib.pyplot as plt
import pickle
import requests
from bs4 import BeautifulSoup
from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

In [2]:
#Load model and features
model = pickle.load(open("Model.sav", 'rb'))
features = pickle.load(open("Features.sav", 'rb'))
scale = pickle.load(open("scale.sav", 'rb'))

In [3]:
features

['MIN',
 'OREB',
 '%FTA',
 '%PTS',
 '3PA',
 '%TOV',
 'PF',
 '%OREB',
 'STL',
 'DD2',
 '%PFD',
 '%3PA',
 'BLK',
 '%AST',
 'GP',
 '+/-',
 '%BLK',
 '%DREB',
 '%PF',
 'NETRTG',
 'Loss%',
 '%STL',
 'COUNTRY',
 'COLLEGE',
 '%BLKA',
 'TD3',
 'BMI',
 'AGE']

## Testing
Let us test our model with two scenarios:
1. Player who did not sustained season ending injury
2. Player who sustained season ending injury

In [4]:
# Example 1: Mikal Bridges 2021-2022 season, no injury.
example_1 = {'MIN':2854,
 'OREB':.9,
 '%FTA':13.3,
 '%PTS': 16.7,
 '3PA': 3.8,
 '%TOV':9.7,
 'PF':1.8,
 '%OREB':14.1,
 'STL':1.2,
 'DD2':0,
 '%PFD':13.9,
 '%3PA':17.2,
 'BLK':.4,
 '%AST':10.9,
 'GP':82,
 '+/-':7,
 '%BLK':15.1,
 '%DREB':13.1,
 '%PF':13.4,
 'NETRTG':9.8,
 'Loss%':.22,
 '%STL':18.8,
 'COUNTRY':1,
 'COLLEGE':1,
 '%BLKA':14,
 'TD3':0,
 'BMI':24.23,
 'AGE':25}

In [5]:
# Example 2: Klay Thompson 2017-2018 season, sustained a season-ending left ACL injury in Jun. 2019.
example_2 = { 'MIN':2504,
 'OREB':.4,
 '%FTA':10.1,
 '%PTS':24.5,
 '3PA':7.1,
 '%TOV':15.5,
 'PF':1.6,
 '%OREB':7.3,
 'STL':0.8,
 'DD2':2,
 '%PFD':13.1,
 '%3PA':34.2,
 'BLK':.5,
 '%AST':11.2,
 'GP':73,
 '+/-':5.4,
 '%BLK':9,
 '%DREB':13.2,
 '%PF':11.6,
 'NETRTG':9.8,
 'Loss%':.29,
 '%STL':13.1,
 'COUNTRY':1,
 'COLLEGE':1,
 '%BLKA':9,
 'TD3':0,
 'BMI':25.51,
 'AGE':27}

In [6]:
# Example 3: DeMarcus Cousins 2016-2017 season, sustained a season-ending left Achilles tendon rupture injury in Jan. 2018.
example_3 = { 'MIN':2465,
 'OREB':2.1,
 '%FTA':50.3,
 '%PTS':36.2,
 '3PA':5,
 '%TOV':37.3,
 'PF':3.9,
 '%OREB':33.1,
 'STL':1.4,
 'DD2':46,
 '%PFD':51.6,
 '%3PA':26.8,
 'BLK':1.3,
 '%AST':28.8,
 'GP':72,
 '+/-':-0.3,
 '%BLK':41.9,
 '%DREB':39.6,
 '%PF':27.9,
 'NETRTG':-0.9,
 'Loss%':.58,
 '%STL':24.5,
 'COUNTRY':1,
 'COLLEGE':1,
 '%BLKA':42.7,
 'TD3':2,
 'BMI':27.55,
 'AGE':26}

In [7]:
# Add test cases to a dataframe
new_test_example = pd.concat([pd.DataFrame(example_1, index=[0]), 
        pd.DataFrame(example_2, index=[1]),pd.DataFrame(example_3, index=[2])])
new_test_example

Unnamed: 0,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,%3PA,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
0,2854,0.9,13.3,16.7,3.8,9.7,1.8,14.1,1.2,0,13.9,17.2,...,15.1,13.1,13.4,9.8,0.22,18.8,1,1,14.0,0,24.23,25
1,2504,0.4,10.1,24.5,7.1,15.5,1.6,7.3,0.8,2,13.1,34.2,...,9.0,13.2,11.6,9.8,0.29,13.1,1,1,9.0,0,25.51,27
2,2465,2.1,50.3,36.2,5.0,37.3,3.9,33.1,1.4,46,51.6,26.8,...,41.9,39.6,27.9,-0.9,0.58,24.5,1,1,42.7,2,27.55,26


In [8]:
mikal = new_test_example.iloc[0, :].to_frame().T
klay = new_test_example.iloc[1, :].to_frame().T
demarcus = new_test_example.iloc[2, :].to_frame().T

mikal = pd.DataFrame(mikal, index=[0])
klay = pd.DataFrame(klay, index=[1])
demarcus = pd.DataFrame(demarcus, index=[2])

In [9]:
mikal

Unnamed: 0,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,%3PA,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
0,2854.0,0.9,13.3,16.7,3.8,9.7,1.8,14.1,1.2,0.0,13.9,17.2,...,15.1,13.1,13.4,9.8,0.22,18.8,1.0,1.0,14.0,0.0,24.23,25.0


In [10]:
klay

Unnamed: 0,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,%3PA,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
1,2504.0,0.4,10.1,24.5,7.1,15.5,1.6,7.3,0.8,2.0,13.1,34.2,...,9.0,13.2,11.6,9.8,0.29,13.1,1.0,1.0,9.0,0.0,25.51,27.0


In [11]:
demarcus

Unnamed: 0,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,%3PA,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
2,2465.0,2.1,50.3,36.2,5.0,37.3,3.9,33.1,1.4,46.0,51.6,26.8,...,41.9,39.6,27.9,-0.9,0.58,24.5,1.0,1.0,42.7,2.0,27.55,26.0


In [12]:
# Scale new data
klay = pd.DataFrame(scale.transform(klay), columns = features)
klay

Unnamed: 0,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,%3PA,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
0,1.32738,-0.712555,-0.899567,1.018048,2.760065,-0.40868,-0.375725,-0.888414,0.248607,-0.309825,-0.693216,1.164782,...,-0.650776,-0.795605,-1.274989,1.153073,-1.338847,-0.767665,0.645444,0.617986,-1.060909,-0.112847,0.498802,0.064433


In [13]:
# Proving we can use our model to predict on that!
model.predict(klay), model.predict_proba(klay)[0][1]

(array([1]), 0.57)

In [14]:
pd.DataFrame(model.feature_importances_, features).sort_values(by = 0, ascending=False)

Unnamed: 0,0
BLK,0.06515
STL,0.064181
PF,0.047024
MIN,0.046547
GP,0.045877
%DREB,0.044354
Loss%,0.042264
BMI,0.04006
%3PA,0.039323
NETRTG,0.039248


### Build Current Players Dataframe

In [15]:
# Load 2022-2023 NBA
players = pd.read_csv('Data/2022_2023.csv')
players.head()

Unnamed: 0,Name,TEAM,AGE,HEIGHT,WEIGHT,COLLEGE,COUNTRY,GP,NETRTG,TS%,W,L,...,%FTA,%OREB,%DREB,%REB,%AST,%TOV,%STL,%BLK,%BLKA,%PF,%PFD,%PTS
0,Aaron Holiday,ATL,26,72,185,UCLA,USA,63,0.9,52.8,32,31,...,8.0,12.3,9.0,9.9,21.5,16.4,25.7,14.3,15.2,24.2,14.1,12.3
1,AJ Griffin,ATL,19,78,220,Duke,USA,72,1.5,57.7,34,38,...,7.6,11.6,11.9,11.9,9.8,11.0,21.1,8.2,15.3,15.3,7.7,18.7
2,Bogdan Bogdanovic,ATL,30,77,225,,Serbia,54,1.2,58.6,27,27,...,10.4,7.0,13.7,12.1,19.1,16.6,21.0,10.7,8.8,14.4,10.4,20.2
3,Bruno Fernando,ATL,24,82,240,Maryland,Angola,39,-8.9,57.1,13,26,...,25.6,42.2,28.1,32.4,17.8,16.4,12.8,59.6,23.3,38.5,27.3,17.2
4,Clint Capela,ATL,29,82,256,,Switzerland,65,-1.1,65.6,35,30,...,16.6,56.7,38.6,43.6,6.3,11.9,18.8,48.5,15.6,22.6,18.0,18.2


In [16]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 539 entries, 0 to 538
Data columns (total 54 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Name     539 non-null    object 
 1   TEAM     539 non-null    object 
 2   AGE      539 non-null    int64  
 3   HEIGHT   539 non-null    int64  
 4   WEIGHT   539 non-null    int64  
 5   COLLEGE  539 non-null    object 
 6   COUNTRY  539 non-null    object 
 7   GP       539 non-null    int64  
 8   NETRTG   539 non-null    float64
 9   TS%      539 non-null    float64
 10  W        539 non-null    int64  
 11  L        539 non-null    int64  
 12  MPG      539 non-null    float64
 13  PTS      539 non-null    float64
 14  FGM      539 non-null    float64
 15  FGA      539 non-null    float64
 16  FG%      539 non-null    float64
 17  3PM      539 non-null    float64
 18  3PA      539 non-null    float64
 19  3P%      539 non-null    float64
 20  FTM      539 non-null    float64
 21  FTA      539 non

In [17]:
players.isnull().sum().value_counts()

0    54
dtype: int64

In [18]:
# Load 2023-2024 NBA roster
response = requests.get('https://basketball.realgm.com/nba/players')

# Create a BeautifulSoup object from the HTML
soup = BeautifulSoup(response.content, 'html.parser')

# Get all of the player names
current_players = []
for play in soup.find_all('td', attrs={'data-th': 'Player'}):
    p = play.find('a').text
    current_players.append(p)

# Print the list of players
current_players

['Precious Achiuwa',
 'Steven Adams',
 'Bam Adebayo',
 'Ochai Agbaji',
 'Santi Aldama',
 'Nickeil Alexander-Walker',
 'Grayson Allen',
 'Jarrett Allen',
 'Jose Alvarado',
 'Kyle Anderson',
 'Giannis Antetokounmpo',
 'Thanasis Antetokounmpo',
 'Cole Anthony',
 'OG Anunoby',
 'Ryan Arcidiacono',
 'Deni Avdija',
 'Deandre Ayton',
 'Udoka Azubuike',
 'Marvin Bagley III',
 'Amari Bailey',
 'Patrick Baldwin, Jr.',
 'LaMelo Ball',
 'Lonzo Ball',
 'Mohamed Bamba',
 'Paolo Banchero',
 'Desmond Bane',
 'Dalano Banton',
 'Dominick Barlow',
 'Harrison Barnes',
 'Scottie Barnes',
 'R.J. Barrett',
 'Charles Bassey',
 'Emoni Bates',
 'Keita Bates-Diop',
 'Nicolas Batum',
 'Bradley Beal',
 'Malik Beasley',
 'MarJon Beauchamp',
 'Charles Bediako',
 'Davis Bertans',
 'Patrick Beverley',
 'Saddiq Bey',
 'Goga Bitadze',
 'Onuralp Bitim',
 'Bismack Biyombo',
 'Anthony Black',
 'Leaky Black',
 'Bogdan Bogdanovic',
 'Bojan Bogdanovic',
 'Bol Bol',
 'Marques Bolden',
 'Devin Booker',
 'B.J. Boston, Jr.',
 'Ch

In [19]:
# Inner join dataframes
players = players.merge(pd.DataFrame(current_players, columns = ['Name']), how = 'inner')
players

Unnamed: 0,Name,TEAM,AGE,HEIGHT,WEIGHT,COLLEGE,COUNTRY,GP,NETRTG,TS%,W,L,...,%FTA,%OREB,%DREB,%REB,%AST,%TOV,%STL,%BLK,%BLKA,%PF,%PFD,%PTS
0,Aaron Holiday,ATL,26,72,185,UCLA,USA,63,0.9,52.8,32,31,...,8.0,12.3,9.0,9.9,21.5,16.4,25.7,14.3,15.2,24.2,14.1,12.3
1,Bogdan Bogdanovic,ATL,30,77,225,,Serbia,54,1.2,58.6,27,27,...,10.4,7.0,13.7,12.1,19.1,16.6,21.0,10.7,8.8,14.4,10.4,20.2
2,Bruno Fernando,ATL,24,82,240,Maryland,Angola,39,-8.9,57.1,13,26,...,25.6,42.2,28.1,32.4,17.8,16.4,12.8,59.6,23.3,38.5,27.3,17.2
3,Clint Capela,ATL,29,82,256,,Switzerland,65,-1.1,65.6,35,30,...,16.6,56.7,38.6,43.6,6.3,11.9,18.8,48.5,15.6,22.6,18.0,18.2
4,De'Andre Hunter,ATL,25,80,221,Virginia,USA,67,-0.6,56.3,35,32,...,20.7,9.5,16.2,14.6,8.5,14.6,11.9,8.1,20.7,24.7,23.9,19.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,Johnny Davis,WAS,21,76,195,Wisconsin,USA,28,-6.2,44.6,10,18,...,13.6,9.8,18.0,15.9,14.4,13.7,18.5,17.8,17.0,25.8,11.4,17.7
383,Jordan Goodwin,WAS,24,75,200,St. Louis,USA,62,-4.5,53.1,22,40,...,12.0,22.8,18.7,19.7,31.9,16.6,32.8,22.0,24.3,21.0,16.1,16.2
384,Kristaps Porzingis,WAS,27,87,240,,Latvia,65,1.4,62.7,27,38,...,43.3,30.2,27.7,28.2,14.5,22.6,21.0,43.7,28.4,24.0,45.8,29.5
385,Kyle Kuzma,WAS,27,81,221,Utah,USA,64,0.3,54.4,29,35,...,23.1,12.9,25.5,22.9,20.2,28.4,11.6,12.1,26.8,17.0,20.0,25.7


387 current NBA players played last season.

In [20]:
# Process current player data

# Binary bin Country['USA'] = 1, other = 0
players.COUNTRY.replace(to_replace='USA', value=1, inplace=True) 
players.loc[players['COUNTRY'] != 1, 'COUNTRY'] = 0
players['COUNTRY'] = players['COUNTRY'].astype(int)

# Binary bin College = 1 , no College = 0
players.COLLEGE.replace(to_replace='None', value=0, inplace=True)
players.loc[players['COLLEGE'] != 0, 'COLLEGE'] = 1
players['COLLEGE'] = players['COLLEGE'].astype(int)

# Dummify Teams
players = pd.get_dummies(players, columns=['TEAM'])

# Engineering features
players['BMI'] = (players['WEIGHT'] / players['HEIGHT']**2) * 703
players['Loss%'] = players['L'] / players['GP']

players.head()


Unnamed: 0,Name,AGE,HEIGHT,WEIGHT,COLLEGE,COUNTRY,GP,NETRTG,TS%,W,L,MPG,...,TEAM_OKC,TEAM_ORL,TEAM_PHI,TEAM_PHX,TEAM_POR,TEAM_SAC,TEAM_SAS,TEAM_TOR,TEAM_UTA,TEAM_WAS,BMI,Loss%
0,Aaron Holiday,26,72,185,1,1,63,0.9,52.8,32,31,13.4,...,0,0,0,0,0,0,0,0,0,0,25.08777,0.492063
1,Bogdan Bogdanovic,30,77,225,0,0,54,1.2,58.6,27,27,27.9,...,0,0,0,0,0,0,0,0,0,0,26.678192,0.5
2,Bruno Fernando,24,82,240,1,0,39,-8.9,57.1,13,26,10.4,...,0,0,0,0,0,0,0,0,0,0,25.092207,0.666667
3,Clint Capela,29,82,256,0,0,65,-1.1,65.6,35,30,26.6,...,0,0,0,0,0,0,0,0,0,0,26.765021,0.461538
4,De'Andre Hunter,25,80,221,1,1,67,-0.6,56.3,35,32,31.7,...,0,0,0,0,0,0,0,0,0,0,24.275469,0.477612


In [21]:
# Name and team
name = ['Name']

# # Domain knowledge features
# good_features = ['AGE', 'BMI', 'Loss%', 'COLLEGE', 'COUNTRY', 'GP', 'NETRTG', 'MPG', 'PTS',
#                 'FGA', '3PA', 'FTA', 'OREB', 'DREB', 'AST', 'TOV', 'STL', 'BLK', 'PF',
#                 'DD2', 'TD3', '+/-', 'MIN', 'USG%', '%FGA', 
#                 '%3PA',  '%FTA', '%OREB', '%DREB', '%AST', '%TOV',
#                 '%STL', '%BLK', '%BLKA', '%PF', '%PFD', '%PTS']

# # Teams
# teams = ['TEAM_ATL', 'TEAM_BKN', 'TEAM_BOS', 'TEAM_CHA',
#        'TEAM_CHI', 'TEAM_CLE', 'TEAM_DAL', 'TEAM_DEN', 'TEAM_DET', 'TEAM_GSW',
#        'TEAM_HOU', 'TEAM_IND', 'TEAM_LAC', 'TEAM_LAL', 'TEAM_MEM', 'TEAM_MIA',
#        'TEAM_MIL', 'TEAM_MIN', 'TEAM_NOP', 'TEAM_NYK',
#        'TEAM_OKC', 'TEAM_ORL', 'TEAM_PHI', 'TEAM_PHX', 'TEAM_POR', 'TEAM_SAC',
#        'TEAM_SAS', 'TEAM_TOR', 'TEAM_UTA', 'TEAM_WAS']

name_features = name + features

In [22]:
# Isolate vif_features
players = players[name_features]
players

Unnamed: 0,Name,MIN,OREB,%FTA,%PTS,3PA,%TOV,PF,%OREB,STL,DD2,%PFD,...,%BLK,%DREB,%PF,NETRTG,Loss%,%STL,COUNTRY,COLLEGE,%BLKA,TD3,BMI,AGE
0,Aaron Holiday,845,0.4,8.0,12.3,1.4,16.4,1.3,12.3,0.6,0,14.1,...,14.3,9.0,24.2,0.9,0.492063,25.7,1,1,15.2,0,25.087770,26
1,Bogdan Bogdanovic,1509,0.4,10.4,20.2,6.7,16.6,1.6,7.0,0.8,0,10.4,...,10.7,13.7,14.4,1.2,0.500000,21.0,0,0,8.8,0,26.678192,30
2,Bruno Fernando,405,1.4,25.6,17.2,0.1,16.4,1.9,42.2,0.2,1,27.3,...,59.6,28.1,38.5,-8.9,0.666667,12.8,0,1,23.3,0,25.092207,24
3,Clint Capela,1730,4.0,16.6,18.2,0.0,11.9,2.1,56.7,0.7,35,18.0,...,48.5,38.6,22.6,-1.1,0.461538,18.8,0,0,15.6,0,26.765021,29
4,De'Andre Hunter,2126,0.7,20.7,19.6,4.3,14.6,3.0,9.5,0.5,1,23.9,...,8.1,16.2,24.7,-0.6,0.477612,11.9,1,1,20.7,0,24.275469,25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382,Johnny Davis,423,0.4,13.6,17.7,2.5,13.7,1.7,9.8,0.4,0,11.4,...,17.8,18.0,25.8,-6.2,0.642857,18.5,1,1,17.0,0,23.733553,21
383,Jordan Goodwin,1105,0.9,12.0,16.2,1.9,16.6,1.5,22.8,0.9,0,16.1,...,22.0,18.7,21.0,-4.5,0.645161,32.8,1,1,24.3,0,24.995556,24
384,Kristaps Porzingis,2120,1.8,43.3,29.5,5.5,22.6,3.0,30.2,0.9,20,45.8,...,43.7,27.7,24.0,1.4,0.584615,21.0,0,0,28.4,0,22.290924,27
385,Kyle Kuzma,2239,0.9,23.1,25.7,7.5,28.4,2.3,12.9,0.6,14,20.0,...,12.1,25.5,17.0,0.3,0.546875,11.6,1,1,26.8,1,23.679774,27


In [23]:
# Save current player data to disk
pickle.dump(players, open('CurrentPlayers.sav', 'wb'))

In [24]:
# testp = players.drop('Name', axis = 1)
# testp

In [25]:
# model.predict(testp).sum()/len(model.predict(testp))

In [26]:
# model.predict(testp)

In [27]:
# pd.DataFrame({'Name':players.Name})

In [28]:
# model.predict_proba(testp)