In [41]:
# Set the seed value for the notebook so the results are reproducible

from numpy.random import seed
seed(1)

In [42]:
# import necessary libraries

import warnings
warnings.simplefilter('ignore')

import numpy as np

import pandas as pd

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification

In [43]:
# load pandas DataFrame
ncaaDF = pd.read_csv("NCAA_data.csv")
ncaaDF.head()

Unnamed: 0,player_id,name,class,college,assists,blocks,effective_field_goal_percentage,field_goal_percentage,free_throw_attempt_rate,free_throw_percentage,...,turnover_percentage,turnovers,win_shares_per_40_minutes,assists_per_40,blocks_per_40,points_per_40,steals_per_40,three_pointers_per_40,total_rebounds_per_40,turnovers_per_40
0,kenyon-martin-1,Kenyon Martin,Above Avg,cincinnati,142,292,0.587,0.586,0.494,0.581,...,14.3,139.0,0.346,2.781587,5.719882,25.053869,2.428991,0.039177,17.022527,2.722821
1,aj-guyton-1,A.J. Guyton,Below Avg,indiana,403,52,0.541,0.455,0.249,0.79,...,13.6,218.0,0.145,4.944785,0.638037,25.766871,1.582822,3.472393,5.263804,2.674847
2,jake-voskuhl-1,Jake Voskuhl,Below Avg,uconn,124,193,0.542,0.542,0.555,0.656,...,23.0,160.0,0.237,2.271062,3.534799,15.805861,1.575092,0.0,16.117216,2.930403
3,khalid-el-amin-1,Khalid El-Amin,Bust,uconn,479,10,0.486,0.416,0.257,0.822,...,15.6,188.0,0.158,8.915775,0.186133,30.711959,3.462075,3.629595,5.937645,3.499302
4,mike-smith-1,Mike Smith,Bust,louisiana monroe,147,42,0.507,0.428,0.357,0.769,...,20.0,241.0,0.165,3.034056,0.866873,21.919505,1.857585,2.683179,9.267286,4.9742


In [44]:
nbaDF.dtypes

player_id                           object
name                                object
class                               object
college                             object
assists                              int64
blocks                               int64
effective_field_goal_percentage    float64
field_goal_percentage              float64
free_throw_attempt_rate            float64
free_throw_percentage              float64
games_played                         int64
height                               int64
minutes_played                     float64
points                               int64
steals                               int64
three_point_percentage             float64
three_pointers                     float64
total_rebounds                       int64
true_shooting_percentage           float64
turnover_percentage                float64
turnovers                          float64
win_shares_per_40_minutes          float64
assists_per_40                     float64
blocks_per_

In [45]:
# Split the x variables (WITHOUT Player, Team, and College Columns) from the y variable

X = ncaaDF.drop(["class","player_id","name","college"], axis=1)
y = ncaaDF["class"]
print(f"Number of Rows and Independent(X) Variable: {X.shape}, \n Number of Rows (One Dependent(Y) variable): {y.shape}")

Number of Rows and Independent(X) Variable: (853, 25), 
 Number of Rows (One Dependent(Y) variable): (853,)


In [46]:
# Label encoding for Dependent(Y) Variable

label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Label: 2
------------
Original Class: Bust
Encoded Lab

Original Class: Star
Encoded Label: 3
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Star
Encoded Label: 3
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Above Avg
Encoded Label: 0
------------
Original Class: Below Avg
Encoded Label: 1
------------
Original Class: Star
Encoded Label: 3
------------
Original Cla

In [47]:
# Use train_test_split to create training and testing data

X_train, X_test, y_train, y_test = train_test_split(X, encoded_y, test_size=float(0.2), random_state=1)

In [48]:
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [49]:
# Make sure the dependent variable categories are set up

# y_train_categorical = to_categorical(y_train)
# y_test_categorical = to_categorical(y_test)

In [50]:
# Create a normal neural network with 12 inputs (12 independent variables), 6 hidden nodes, and 4 outputs
# input_dim refers to number of independent(X) variables
# units is output shape i.e. number of categories in the Dependent Variable Column
# In this example, final units=4 because Four Rankings: Star, Above Average, Below Average, Bust

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(units=6, activation='relu'))
model.add(Dense(units=len(y.unique()), activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 6)                 156       
_________________________________________________________________
dense_16 (Dense)             (None, 6)                 42        
_________________________________________________________________
dense_17 (Dense)             (None, 4)                 28        
Total params: 226
Trainable params: 226
Non-trainable params: 0
_________________________________________________________________


In [51]:
# Compile the model

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])



In [52]:
# Fit the model

model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    batch_size=10,
    shuffle=True,
    verbose=2
)

Epoch 1/100
 - 0s - loss: 1.6473 - acc: 0.2639
Epoch 2/100
 - 0s - loss: 1.5069 - acc: 0.2874
Epoch 3/100
 - 0s - loss: 1.4271 - acc: 0.3226
Epoch 4/100
 - 0s - loss: 1.3822 - acc: 0.3255
Epoch 5/100
 - 0s - loss: 1.3560 - acc: 0.3416
Epoch 6/100
 - 0s - loss: 1.3368 - acc: 0.3607
Epoch 7/100
 - 0s - loss: 1.3233 - acc: 0.3695
Epoch 8/100
 - 0s - loss: 1.3124 - acc: 0.3812
Epoch 9/100
 - 0s - loss: 1.3031 - acc: 0.3871
Epoch 10/100
 - 0s - loss: 1.2956 - acc: 0.3871
Epoch 11/100
 - 0s - loss: 1.2893 - acc: 0.3900
Epoch 12/100
 - 0s - loss: 1.2837 - acc: 0.3930
Epoch 13/100
 - 0s - loss: 1.2791 - acc: 0.4047
Epoch 14/100
 - 0s - loss: 1.2749 - acc: 0.4047
Epoch 15/100
 - 0s - loss: 1.2721 - acc: 0.3974
Epoch 16/100
 - 0s - loss: 1.2685 - acc: 0.4062
Epoch 17/100
 - 0s - loss: 1.2659 - acc: 0.4091
Epoch 18/100
 - 0s - loss: 1.2618 - acc: 0.4120
Epoch 19/100
 - 0s - loss: 1.2585 - acc: 0.4091
Epoch 20/100
 - 0s - loss: 1.2558 - acc: 0.4003
Epoch 21/100
 - 0s - loss: 1.2525 - acc: 0.4076
E

<tensorflow.python.keras.callbacks.History at 0x29578498b00>

In [53]:
# Save the model

model.save("Draft_Machine_Model.h5")

In [55]:
# Evaluate the model using the testing data

model = load_model("Draft_Machine_Model.h5")
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

 - 0s - loss: 1.2887 - acc: 0.3684
Loss: 1.28871070223245, Accuracy: 0.3684210479259491
