# Import Dependancies

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

connection_string = f"group5:Group5Project@nbastats2.c4lr0vlcqzd5.us-east-1.rds.amazonaws.com:5432/NBA_Stats"
engine = create_engine(f'postgresql://{connection_string}')


In [2]:
# Read the csv file into a pandas DataFrame

all_rookies_df = pd.read_excel('Data/ROY Data/all_rookies_1990_2020.xlsx')
all_rookies_df.drop(columns=['Unnamed: 0','Rk','Debut','Age','Yrs'],inplace=True)
all_rookies_df["Player"] = all_rookies_df.Player.str.replace('*', '')

# Create a dataframe of rookies for the 2019-20
rookies_2019_20_df = all_rookies_df.loc[all_rookies_df["season"] == "2019-20"]

# Remove records for the 2019-20 season from all rookies dataframe
all_rookies_df = all_rookies_df.loc[all_rookies_df["season"] != "2019-20"]


In [3]:
# Read Rookie of the Year candidates from csv
roy_votes_df = pd.read_excel('Data/ROY Data/all_roy_candidates_1990_2020.xlsx')

# Restructure the dataframe to contain only player and season
roy_votes_df = roy_votes_df[["Player", "season"]]

# Add a column and set to 1 as Rookie of the Year candidate
roy_votes_df["ROYCandidate"] = 1
#roy_votes_df

In [4]:
# Set NaN to 0
rookies_2019_20_df["3P%"] = rookies_2019_20_df["3P%"].fillna(0)
rookies_2019_20_df["FG%"] = rookies_2019_20_df["FG%"].fillna(0)

# Add additional per game average columns
rookies_2019_20_df["BLKPG"] = rookies_2019_20_df["BLK"]/rookies_2019_20_df["G"]
rookies_2019_20_df["STLPG"] = rookies_2019_20_df["STL"]/rookies_2019_20_df["G"]
rookies_2019_20_df["TOVPG"] = rookies_2019_20_df["TOV"]/rookies_2019_20_df["G"]


In [5]:
# left merge all rookies and candidates
all_rookies_df = pd.merge(all_rookies_df, roy_votes_df, how='left', on=["Player","season"])

# Set all ROYCandidate to 0 if it is Nan - Not a candidate
all_rookies_df["ROYCandidate"] = all_rookies_df["ROYCandidate"].fillna(0)
all_rookies_df["3P%"] = all_rookies_df["3P%"].fillna(0)
all_rookies_df["FG%"] = all_rookies_df["FG%"].fillna(0)

all_rookies_df["BLKPG"] = all_rookies_df["BLK"]/all_rookies_df["G"]
all_rookies_df["STLPG"] = all_rookies_df["STL"]/all_rookies_df["G"]
all_rookies_df["TOVPG"] = all_rookies_df["TOV"]/all_rookies_df["G"]



In [6]:
# Read csv file with advnaced statistics
advanced_df = pd.read_csv("AdvancedStats.csv")

# drop column player as it is not needed
advanced_df.drop(columns="Player", inplace=True)
# rename PlayerName to Player to make merge easier
advanced_df["Player"] = advanced_df["PlayerName"]

# Drop duplicate records for players played on multiple teams during a season, keep the Totals
advanced_df = advanced_df.drop_duplicates(subset=['Player','season'], keep='first')
#advanced_df = advanced_df.loc[advanced_df["Tm"] != "TOT"]

# Merge All Rookies and Advanced Statistics
all_rookies_df = pd.merge(all_rookies_df, advanced_df, how='inner', on=["Player","season"])

# Set values to 0 for NaN
all_rookies_df["PER"] = all_rookies_df["PER"].fillna(0)
all_rookies_df["WS"] = all_rookies_df["WS"].fillna(0)
all_rookies_df["VORP"] = all_rookies_df["VORP"].fillna(0)

# Merge 2019-20 Rookies and Adbanced Statistics
rookies_2019_20_df = pd.merge(rookies_2019_20_df, advanced_df, how='inner', on=["Player","season"])

# Set values to 0 for NaN
rookies_2019_20_df["PER"] = rookies_2019_20_df["PER"].fillna(0)
rookies_2019_20_df["WS"] = rookies_2019_20_df["WS"].fillna(0)
rookies_2019_20_df["VORP"] = rookies_2019_20_df["VORP"].fillna(0)

In [7]:
all_rookies_df.columns


Index(['Player', 'G_x', 'MP_x', 'FG', 'FGA', '3P', '3PA', 'FT', 'FTA', 'ORB',
       'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'FG%', '3P%', 'FT%',
       'MP.1', 'PTS.1', 'TRB.1', 'AST.1', 'season', 'ROYCandidate', 'BLKPG',
       'STLPG', 'TOVPG', 'Rk', 'Pos', 'Age', 'Tm', 'G_y', 'MP_y', 'PER', 'TS%',
       '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%', 'BLK%', 'TOV%',
       'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM', 'BPM', 'VORP',
       'PlayerName', 'PlayerID'],
      dtype='object')

# Data Pre-processing

Includes:

* Converting categorical data
* Feature selection
* Scaling and transforming data

In [8]:
# Assign X (features) and y (target)
# X and y to be used for trin/test
# Xr is current rookies to predict if Rookie of the year candidate

#X = all_rookies_df[['MP.1','PER','WS','VORP']]
#Xr = rookies_2019_20_df[['MP.1','PER','WS','VORP']]

X = all_rookies_df[['MP.1', 'PTS.1', 'TRB.1', 'AST.1','BLKPG','STLPG']]
Xr = rookies_2019_20_df[['MP.1', 'PTS.1', 'TRB.1', 'AST.1','BLKPG','STLPG']]
y = all_rookies_df["ROYCandidate"]

# Reshape target if necessary (some functions in sklearn require 2-d arrays)
y = y.values.reshape(-1, 1)

In [9]:
# Split the data into training and testing
# Default split is 80% testing, 20% training
# Setting random_state is not necessary but makes results reproducible

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [10]:
# Create a StandardScater model and fit it to the training data
# StandardScaler makes data normally distributed

X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

In [11]:
# Transform the training and testing data using the X_scaler and y_scaler models

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
Xr_test_scaled = X_scaler.transform(Xr)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

# Create and test model(s)

## Create and Fit Model

Includes:
* Instantiating model
* Fitting with or without hyperparameter tuning (including GridsearchCV)

In [12]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train_scaled, encoded_y_train)

print(f"Training Data Score: {rf.score(X_train_scaled, encoded_y_train)}")
print(f"Testing Data Score: {rf.score(X_test_scaled, encoded_y_test)}")

  return f(**kwargs)


Training Data Score: 1.0
Testing Data Score: 0.92


In [13]:
from sklearn.svm import SVC 

from sklearn.model_selection import GridSearchCV
param_grid = { 
    'n_estimators': [400, 700, 1000]
}
grid2 = GridSearchCV(estimator=rf, param_grid=param_grid, verbose=3)

grid2.fit(X_train_scaled, encoded_y_train)

print(grid2.best_params_)
print(grid2.best_score_)


Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] n_estimators=400 ................................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .................... n_estimators=400, score=0.952, total=   0.8s
[CV] n_estimators=400 ................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV] .................... n_estimators=400, score=0.927, total=   0.8s
[CV] n_estimators=400 ................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.5s remaining:    0.0s


[CV] .................... n_estimators=400, score=0.927, total=   0.8s
[CV] n_estimators=400 ................................................
[CV] .................... n_estimators=400, score=0.927, total=   0.8s
[CV] n_estimators=400 ................................................
[CV] .................... n_estimators=400, score=0.959, total=   0.8s
[CV] n_estimators=700 ................................................
[CV] .................... n_estimators=700, score=0.952, total=   1.4s
[CV] n_estimators=700 ................................................
[CV] .................... n_estimators=700, score=0.927, total=   1.3s
[CV] n_estimators=700 ................................................
[CV] .................... n_estimators=700, score=0.930, total=   1.3s
[CV] n_estimators=700 ................................................
[CV] .................... n_estimators=700, score=0.933, total=   1.3s
[CV] n_estimators=700 ................................................
[CV] .

[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   20.5s finished


{'n_estimators': 700}
0.9403174603174603


In [14]:
predictions2 = grid2.predict(X_test_scaled)
predictions = grid2.predict(Xr_test_scaled)

from sklearn.metrics import classification_report
print(classification_report(encoded_y_test, predictions2,
                           target_names=["Not ROY Candidate", "ROY Candidate"]))

                   precision    recall  f1-score   support

Not ROY Candidate       0.93      0.98      0.96       474
    ROY Candidate       0.67      0.31      0.43        51

         accuracy                           0.92       525
        macro avg       0.80      0.65      0.69       525
     weighted avg       0.90      0.92      0.90       525



In [15]:
name_list = []
season_list = []
team_list = []
model_list = []

for i in range(len(predictions)):
    if predictions[i] == 1:
        print(i,rookies_2019_20_df.iloc[i,0],rookies_2019_20_df.iloc[i,31],rookies_2019_20_df.iloc[i,24])
        name_list.append(rookies_2019_20_df.iloc[i,0])
        season_list.append(rookies_2019_20_df.iloc[i,24])
        team_list.append(rookies_2019_20_df.iloc[i,31])
        model_list.append("Basic Stats Random Forest")

72 Ja Morant MEM 2019-20
112 Zion Williamson NOP 2019-20


In [16]:
from sklearn.neighbors import KNeighborsClassifier
train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, encoded_y_train)
    train_score = knn.score(X_train_scaled, encoded_y_train)
    test_score = knn.score(X_test_scaled, encoded_y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")

knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train_scaled, encoded_y_train)
print('Test Acc: %.3f' % knn.score(X_test_scaled, encoded_y_test))
print(f"Training Data Score: {knn.score(X_train_scaled, encoded_y_train)}")
print(f"Testing Data Score: {knn.score(X_test_scaled, encoded_y_test)}")



k: 1, Train/Test Score: 1.000/0.897
k: 3, Train/Test Score: 0.956/0.903
k: 5, Train/Test Score: 0.952/0.918
k: 7, Train/Test Score: 0.950/0.922
k: 9, Train/Test Score: 0.947/0.924
k: 11, Train/Test Score: 0.945/0.928
k: 13, Train/Test Score: 0.946/0.922
k: 15, Train/Test Score: 0.943/0.924
k: 17, Train/Test Score: 0.944/0.922
k: 19, Train/Test Score: 0.939/0.918
Test Acc: 0.924
Training Data Score: 0.9473015873015873
Testing Data Score: 0.9238095238095239


In [17]:
predictions2 = knn.predict(X_test_scaled)
predictions = knn.predict(Xr_test_scaled)

from sklearn.metrics import classification_report
print(classification_report(encoded_y_test, predictions2,target_names=["Not ROY Candidate", "ROY Candidate"]))


                   precision    recall  f1-score   support

Not ROY Candidate       0.93      0.99      0.96       474
    ROY Candidate       0.79      0.29      0.43        51

         accuracy                           0.92       525
        macro avg       0.86      0.64      0.69       525
     weighted avg       0.92      0.92      0.91       525



In [18]:
for i in range(len(predictions)):
    if predictions[i] == 1:
        print(i,rookies_2019_20_df.iloc[i,0],rookies_2019_20_df.iloc[i,31],rookies_2019_20_df.iloc[i,24])
        name_list.append(rookies_2019_20_df.iloc[i,0])
        season_list.append(rookies_2019_20_df.iloc[i,24])
        team_list.append(rookies_2019_20_df.iloc[i,31])
        model_list.append("Basic Stats KNN")

35 Rui Hachimura WAS 2019-20
72 Ja Morant MEM 2019-20
112 Zion Williamson NOP 2019-20


In [19]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

deep_model = Sequential()
deep_model.add(Dense(units=8, activation='relu', input_dim=6))
deep_model.add(Dense(units=8, activation='relu'))
deep_model.add(Dense(units=2, activation='softmax'))

deep_model.summary()

deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)


model_loss, model_accuracy = deep_model.evaluate(
    X_test_scaled,y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 56        
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 72        
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 18        
Total params: 146
Trainable params: 146
Non-trainable params: 0
_________________________________________________________________
Train on 1575 samples
Epoch 1/1000
1575/1575 - 1s - loss: 0.6680 - accuracy: 0.6654
Epoch 2/1000
1575/1575 - 0s - loss: 0.5202 - accuracy: 0.8959
Epoch 3/1000
1575/1575 - 0s - loss: 0.3991 - accuracy: 0.9073
Epoch 4/1000
1575/1575 - 0s - loss: 0.3125 - accuracy: 0.9098
Epoch 5/1000
1575/1575 - 0s - loss: 0.2623 - accuracy: 0.9143
Epoch 6/1000
1575/1575 - 0s - loss: 0.2312 - accuracy: 0

Epoch 119/1000
1575/1575 - 0s - loss: 0.1285 - accuracy: 0.9473
Epoch 120/1000
1575/1575 - 0s - loss: 0.1290 - accuracy: 0.9486
Epoch 121/1000
1575/1575 - 0s - loss: 0.1286 - accuracy: 0.9486
Epoch 122/1000
1575/1575 - 0s - loss: 0.1282 - accuracy: 0.9492
Epoch 123/1000
1575/1575 - 0s - loss: 0.1284 - accuracy: 0.9479
Epoch 124/1000
1575/1575 - 0s - loss: 0.1286 - accuracy: 0.9486
Epoch 125/1000
1575/1575 - 0s - loss: 0.1282 - accuracy: 0.9486
Epoch 126/1000
1575/1575 - 0s - loss: 0.1287 - accuracy: 0.9492
Epoch 127/1000
1575/1575 - 0s - loss: 0.1289 - accuracy: 0.9460
Epoch 128/1000
1575/1575 - 0s - loss: 0.1283 - accuracy: 0.9467
Epoch 129/1000
1575/1575 - 0s - loss: 0.1287 - accuracy: 0.9486
Epoch 130/1000
1575/1575 - 0s - loss: 0.1282 - accuracy: 0.9467
Epoch 131/1000
1575/1575 - 0s - loss: 0.1281 - accuracy: 0.9492
Epoch 132/1000
1575/1575 - 0s - loss: 0.1279 - accuracy: 0.9486
Epoch 133/1000
1575/1575 - 0s - loss: 0.1282 - accuracy: 0.9492
Epoch 134/1000
1575/1575 - 0s - loss: 0.

Epoch 248/1000
1575/1575 - 0s - loss: 0.1243 - accuracy: 0.9479
Epoch 249/1000
1575/1575 - 0s - loss: 0.1243 - accuracy: 0.9486
Epoch 250/1000
1575/1575 - 0s - loss: 0.1248 - accuracy: 0.9467
Epoch 251/1000
1575/1575 - 0s - loss: 0.1242 - accuracy: 0.9479
Epoch 252/1000
1575/1575 - 0s - loss: 0.1241 - accuracy: 0.9498
Epoch 253/1000
1575/1575 - 0s - loss: 0.1242 - accuracy: 0.9486
Epoch 254/1000
1575/1575 - 0s - loss: 0.1248 - accuracy: 0.9473
Epoch 255/1000
1575/1575 - 0s - loss: 0.1239 - accuracy: 0.9492
Epoch 256/1000
1575/1575 - 0s - loss: 0.1247 - accuracy: 0.9486
Epoch 257/1000
1575/1575 - 0s - loss: 0.1244 - accuracy: 0.9473
Epoch 258/1000
1575/1575 - 0s - loss: 0.1237 - accuracy: 0.9486
Epoch 259/1000
1575/1575 - 0s - loss: 0.1238 - accuracy: 0.9492
Epoch 260/1000
1575/1575 - 0s - loss: 0.1240 - accuracy: 0.9498
Epoch 261/1000
1575/1575 - 0s - loss: 0.1235 - accuracy: 0.9498
Epoch 262/1000
1575/1575 - 0s - loss: 0.1237 - accuracy: 0.9492
Epoch 263/1000
1575/1575 - 0s - loss: 0.

1575/1575 - 0s - loss: 0.1205 - accuracy: 0.9486
Epoch 377/1000
1575/1575 - 0s - loss: 0.1207 - accuracy: 0.9486
Epoch 378/1000
1575/1575 - 0s - loss: 0.1201 - accuracy: 0.9498
Epoch 379/1000
1575/1575 - 0s - loss: 0.1201 - accuracy: 0.9511
Epoch 380/1000
1575/1575 - 0s - loss: 0.1200 - accuracy: 0.9498
Epoch 381/1000
1575/1575 - 0s - loss: 0.1212 - accuracy: 0.9498
Epoch 382/1000
1575/1575 - 0s - loss: 0.1208 - accuracy: 0.9467
Epoch 383/1000
1575/1575 - 0s - loss: 0.1213 - accuracy: 0.9498
Epoch 384/1000
1575/1575 - 0s - loss: 0.1197 - accuracy: 0.9517
Epoch 385/1000
1575/1575 - 0s - loss: 0.1200 - accuracy: 0.9492
Epoch 386/1000
1575/1575 - 0s - loss: 0.1201 - accuracy: 0.9511
Epoch 387/1000
1575/1575 - 0s - loss: 0.1207 - accuracy: 0.9505
Epoch 388/1000
1575/1575 - 0s - loss: 0.1198 - accuracy: 0.9517
Epoch 389/1000
1575/1575 - 0s - loss: 0.1200 - accuracy: 0.9498
Epoch 390/1000
1575/1575 - 0s - loss: 0.1199 - accuracy: 0.9517
Epoch 391/1000
1575/1575 - 0s - loss: 0.1204 - accuracy

Epoch 505/1000
1575/1575 - 0s - loss: 0.1176 - accuracy: 0.9524
Epoch 506/1000
1575/1575 - 0s - loss: 0.1171 - accuracy: 0.9517
Epoch 507/1000
1575/1575 - 0s - loss: 0.1176 - accuracy: 0.9511
Epoch 508/1000
1575/1575 - 0s - loss: 0.1173 - accuracy: 0.9530
Epoch 509/1000
1575/1575 - 0s - loss: 0.1180 - accuracy: 0.9511
Epoch 510/1000
1575/1575 - 0s - loss: 0.1174 - accuracy: 0.9505
Epoch 511/1000
1575/1575 - 0s - loss: 0.1185 - accuracy: 0.9524
Epoch 512/1000
1575/1575 - 0s - loss: 0.1177 - accuracy: 0.9498
Epoch 513/1000
1575/1575 - 0s - loss: 0.1192 - accuracy: 0.9517
Epoch 514/1000
1575/1575 - 0s - loss: 0.1169 - accuracy: 0.9530
Epoch 515/1000
1575/1575 - 0s - loss: 0.1169 - accuracy: 0.9517
Epoch 516/1000
1575/1575 - 0s - loss: 0.1177 - accuracy: 0.9537
Epoch 517/1000
1575/1575 - 0s - loss: 0.1174 - accuracy: 0.9511
Epoch 518/1000
1575/1575 - 0s - loss: 0.1166 - accuracy: 0.9517
Epoch 519/1000
1575/1575 - 0s - loss: 0.1172 - accuracy: 0.9511
Epoch 520/1000
1575/1575 - 0s - loss: 0.

1575/1575 - 0s - loss: 0.1167 - accuracy: 0.9530
Epoch 634/1000
1575/1575 - 0s - loss: 0.1148 - accuracy: 0.9524
Epoch 635/1000
1575/1575 - 0s - loss: 0.1147 - accuracy: 0.9543
Epoch 636/1000
1575/1575 - 0s - loss: 0.1146 - accuracy: 0.9543
Epoch 637/1000
1575/1575 - 0s - loss: 0.1150 - accuracy: 0.9543
Epoch 638/1000
1575/1575 - 0s - loss: 0.1163 - accuracy: 0.9492
Epoch 639/1000
1575/1575 - 0s - loss: 0.1144 - accuracy: 0.9556
Epoch 640/1000
1575/1575 - 0s - loss: 0.1150 - accuracy: 0.9517
Epoch 641/1000
1575/1575 - 0s - loss: 0.1146 - accuracy: 0.9517
Epoch 642/1000
1575/1575 - 0s - loss: 0.1148 - accuracy: 0.9549
Epoch 643/1000
1575/1575 - 0s - loss: 0.1146 - accuracy: 0.9549
Epoch 644/1000
1575/1575 - 0s - loss: 0.1151 - accuracy: 0.9543
Epoch 645/1000
1575/1575 - 0s - loss: 0.1152 - accuracy: 0.9530
Epoch 646/1000
1575/1575 - 0s - loss: 0.1149 - accuracy: 0.9537
Epoch 647/1000
1575/1575 - 0s - loss: 0.1147 - accuracy: 0.9543
Epoch 648/1000
1575/1575 - 0s - loss: 0.1146 - accuracy

Epoch 762/1000
1575/1575 - 0s - loss: 0.1135 - accuracy: 0.9537
Epoch 763/1000
1575/1575 - 0s - loss: 0.1131 - accuracy: 0.9530
Epoch 764/1000
1575/1575 - 0s - loss: 0.1136 - accuracy: 0.9543
Epoch 765/1000
1575/1575 - 0s - loss: 0.1134 - accuracy: 0.9537
Epoch 766/1000
1575/1575 - 0s - loss: 0.1134 - accuracy: 0.9537
Epoch 767/1000
1575/1575 - 0s - loss: 0.1139 - accuracy: 0.9511
Epoch 768/1000
1575/1575 - 0s - loss: 0.1135 - accuracy: 0.9524
Epoch 769/1000
1575/1575 - 0s - loss: 0.1137 - accuracy: 0.9511
Epoch 770/1000
1575/1575 - 0s - loss: 0.1132 - accuracy: 0.9524
Epoch 771/1000
1575/1575 - 0s - loss: 0.1141 - accuracy: 0.9543
Epoch 772/1000
1575/1575 - 0s - loss: 0.1136 - accuracy: 0.9543
Epoch 773/1000
1575/1575 - 0s - loss: 0.1134 - accuracy: 0.9530
Epoch 774/1000
1575/1575 - 0s - loss: 0.1131 - accuracy: 0.9530
Epoch 775/1000
1575/1575 - 0s - loss: 0.1140 - accuracy: 0.9537
Epoch 776/1000
1575/1575 - 0s - loss: 0.1136 - accuracy: 0.9524
Epoch 777/1000
1575/1575 - 0s - loss: 0.

1575/1575 - 0s - loss: 0.1124 - accuracy: 0.9537
Epoch 891/1000
1575/1575 - 0s - loss: 0.1120 - accuracy: 0.9549
Epoch 892/1000
1575/1575 - 0s - loss: 0.1141 - accuracy: 0.9511
Epoch 893/1000
1575/1575 - 0s - loss: 0.1119 - accuracy: 0.9549
Epoch 894/1000
1575/1575 - 0s - loss: 0.1121 - accuracy: 0.9549
Epoch 895/1000
1575/1575 - 0s - loss: 0.1125 - accuracy: 0.9543
Epoch 896/1000
1575/1575 - 0s - loss: 0.1118 - accuracy: 0.9549
Epoch 897/1000
1575/1575 - 0s - loss: 0.1135 - accuracy: 0.9549
Epoch 898/1000
1575/1575 - 0s - loss: 0.1129 - accuracy: 0.9524
Epoch 899/1000
1575/1575 - 0s - loss: 0.1122 - accuracy: 0.9537
Epoch 900/1000
1575/1575 - 0s - loss: 0.1120 - accuracy: 0.9511
Epoch 901/1000
1575/1575 - 0s - loss: 0.1127 - accuracy: 0.9524
Epoch 902/1000
1575/1575 - 0s - loss: 0.1120 - accuracy: 0.9530
Epoch 903/1000
1575/1575 - 0s - loss: 0.1117 - accuracy: 0.9537
Epoch 904/1000
1575/1575 - 0s - loss: 0.1130 - accuracy: 0.9505
Epoch 905/1000
1575/1575 - 0s - loss: 0.1121 - accuracy

In [20]:
# Load the model
from tensorflow.keras.models import load_model
#deep_model = load_model("rookie_trained.h5")
deep_model = load_model("rookie_trained_trad.h5")

ynew = deep_model.predict_classes(Xr_test_scaled)
for i in range(len(Xr_test_scaled)):
    if (ynew[i] == 1):
        print(i, rookies_2019_20_df.iloc[i,0],rookies_2019_20_df.iloc[i,31],rookies_2019_20_df.iloc[i,24])
        name_list.append(rookies_2019_20_df.iloc[i,0])
        season_list.append(rookies_2019_20_df.iloc[i,24])
        team_list.append(rookies_2019_20_df.iloc[i,31])
        model_list.append("Basic Stats Deep Learning")


1 RJ Barrett NYK 2019-20
16 Brandon Clarke MEM 2019-20
35 Rui Hachimura WAS 2019-20
72 Ja Morant MEM 2019-20
78 Kendrick Nunn MIA 2019-20
82 Eric Paschall GSW 2019-20
105 P.J. Washington CHO 2019-20
112 Zion Williamson NOP 2019-20


In [21]:
# Save the model
#deep_model.save("rookie_trained_trad.h5")

In [22]:
#df = pd.DataFrame({'Player': name_list, 'season': season_list, 'Tm': team_list, 'model': model_list})
#df.to_sql(name='roy_predictions', con=engine, if_exists='append', index=False)

In [23]:
imp = rf.feature_importances_
feature_list=[]
weight_list=[]
model_list=[]

for i,value in enumerate(imp):
    print(X.columns[i],value, "RF Advanced") 
    feature_list.append(X.columns[i])
    weight_list.append(value)
    model_list.append("RF Basic")

MP.1 0.26192929783227975 RF Advanced
PTS.1 0.25307647498919295 RF Advanced
TRB.1 0.1538103188175549 RF Advanced
AST.1 0.09854956758033666 RF Advanced
BLKPG 0.09277085958311923 RF Advanced
STLPG 0.1398634811975167 RF Advanced


In [24]:
from sklearn.inspection import permutation_importance
results = permutation_importance(knn,X_train_scaled, encoded_y_train, scoring='accuracy')
# get importance
importance = results.importances_mean

sumImportances = 0.0
for v in importance:
    sumImportances += v
print(sumImportances)
# summarize feature importance
for i,v in enumerate(importance):
    print(X.columns[i], v/sumImportances, "KNN Basic")
    feature_list.append(X.columns[i])
    weight_list.append(v/sumImportances)
    model_list.append("KNN Advanced")    

0.06146031746031748
MP.1 0.11570247933884296 KNN Basic
PTS.1 0.3202479338842981 KNN Basic
TRB.1 0.202479338842975 KNN Basic
AST.1 0.21900826446280985 KNN Basic
BLKPG 0.07851239669421516 KNN Basic
STLPG 0.06404958677685897 KNN Basic


In [25]:
df = pd.DataFrame({'feature': feature_list, 'weight': weight_list, 'model': model_list})
df.to_sql(name='roy_feature_weights', con=engine, if_exists='append', index=False)