In [3]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report

In [10]:
player_stats = pd.read_csv("Data/cleaned_NBA_stats.csv")

## Building the Model 

In [14]:
# Determine features and target for Stats vs MVP/Non-MVP model testing (Use top 12 stats from MVP/Non-MVP correlation)
features = ['G', 'PTS', 'MP', 'AST', 'TRB', 'FG%', 'FGA','FT%']

X = player_stats[features]
y = player_stats['Share']

# Scale/Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# Split data into training and test datasets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, random_state = 42)

# Build the model
inputs = X_train.shape[1]
hidden_nodes_l1 = 60
hidden_nodes_l2 = 40
hidden_nodes_l3 = 20
model = Sequential()

# First Hidden Layer
model.add(Dense(units = hidden_nodes_l1, activation = 'relu', input_dim = inputs))

# Second Hidden Layer
model.add(Dense(units = hidden_nodes_l2, activation = 'relu'))

# Third Hidden Layer
model.add(Dense(units = hidden_nodes_l3, activation = 'relu'))

# Output Layer
model.add(Dense(units = 1, activation = 'sigmoid'))

# Check model structure
model.summary()

# Compile model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train model
model.fit(X_train, y_train, epochs = 15, batch_size = 25, validation_data = (X_test, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test ,y_test)
print(f"Test Loss: {round(loss,2)*100}%, Test Accuracy: {round(accuracy,2)*100}%")

model_original = pd.DataFrame({'Loss': [round(loss,2)], 'Accuracy': [round(accuracy,2)]})

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 60)                540       
                                                                 
 dense_1 (Dense)             (None, 40)                2440      
                                                                 
 dense_2 (Dense)             (None, 20)                820       
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
Total params: 3821 (14.93 KB)
Trainable params: 3821 (14.93 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 1

In [15]:
# First Attempt (Using new number of neurons and less epochs)

# Build the model
inputs = X_train.shape[1]
hidden_nodes_l1 = 100
hidden_nodes_l2 = 60
hidden_nodes_l3 = 30
model = Sequential()

# First Hidden Layer
model.add(Dense(units = hidden_nodes_l1, activation = 'relu', input_dim = inputs))

# Second Hidden Layer
model.add(Dense(units = hidden_nodes_l2, activation = 'relu'))

# Third Hidden Layer
model.add(Dense(units = hidden_nodes_l3, activation = 'relu'))

# Output Layer
model.add(Dense(units = 1, activation = 'sigmoid'))

# Check model structure
model.summary()

# Compile model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train model
model.fit(X_train, y_train, epochs = 10, batch_size = 25, validation_data = (X_test, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test ,y_test)
print(f"Test Loss: {round(loss,2)*100}%, Test Accuracy: {round(accuracy,2)*100}%")

Optimizing_1 = pd.DataFrame({'Loss': [round(loss,2)], 'Accuracy': [round(accuracy,2)]})
Optimizing_1

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 100)               900       
                                                                 
 dense_5 (Dense)             (None, 60)                6060      
                                                                 
 dense_6 (Dense)             (None, 30)                1830      
                                                                 
 dense_7 (Dense)             (None, 1)                 31        
                                                                 
Total params: 8821 (34.46 KB)
Trainable params: 8821 (34.46 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 2.0%, Test Accuracy: 97.0%


Unnamed: 0,Loss,Accuracy
0,0.02,0.97


In [16]:
# Second Attempt (Using additional hidden layer)

# Build the model
inputs = X_train.shape[1]
hidden_nodes_l1 = 100
hidden_nodes_l2 = 80
hidden_nodes_l3 = 60
hidden_nodes_l4 = 40
model = Sequential()

# First Hidden Layer
model.add(Dense(units = hidden_nodes_l1, activation = 'relu', input_dim = inputs))

# Second Hidden Layer
model.add(Dense(units = hidden_nodes_l2, activation = 'relu'))

# Third Hidden Layer
model.add(Dense(units = hidden_nodes_l3, activation = 'relu'))

# Fourth Hidden Layer
model.add(Dense(units = hidden_nodes_l4, activation = 'relu'))

# Output Layer
model.add(Dense(units = 1, activation = 'sigmoid'))

# Check model structure
model.summary()

# Compile model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train model
model.fit(X_train, y_train, epochs = 10, batch_size = 25, validation_data = (X_test, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test ,y_test)
print(f"Test Loss: {round(loss,2)*100}%, Test Accuracy: {round(accuracy,2)*100}%")

Optimizing_2 = pd.DataFrame({'Loss': [round(loss,2)], 'Accuracy': [round(accuracy,2)]})
Optimizing_2

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 100)               900       
                                                                 
 dense_9 (Dense)             (None, 80)                8080      
                                                                 
 dense_10 (Dense)            (None, 60)                4860      
                                                                 
 dense_11 (Dense)            (None, 40)                2440      
                                                                 
 dense_12 (Dense)            (None, 1)                 41        
                                                                 
Total params: 16321 (63.75 KB)
Trainable params: 16321 (63.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2

Unnamed: 0,Loss,Accuracy
0,0.02,0.97


In [17]:
# Third Attempt (Using different number of neurons, epochs and learning rate)
from keras.optimizers import Adam

# Build the model
inputs = X_train.shape[1]
hidden_nodes_l1 = 100
hidden_nodes_l2 = 80
hidden_nodes_l3 = 60
model = Sequential()

# First Hidden Layer
model.add(Dense(units = hidden_nodes_l1, activation = 'relu', input_dim = inputs))

# Second Hidden Layer
model.add(Dense(units = hidden_nodes_l2, activation = 'relu'))

# Third Hidden Layer
model.add(Dense(units = hidden_nodes_l3, activation = 'relu'))

# Output Layer
model.add(Dense(units = 1, activation = 'sigmoid'))

# Check model structure
model.summary()

# Compile model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train model
model.fit(X_train, y_train, epochs = 10, batch_size = 25, validation_data = (X_test, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test ,y_test)
print(f"Test Loss: {round(loss,2)*100}%, Test Accuracy: {round(accuracy,2)*100}%")

Optimizing_3 = pd.DataFrame({'Loss': [round(loss,2)], 'Accuracy': [round(accuracy,2)]})
Optimizing_3

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (None, 100)               900       
                                                                 
 dense_14 (Dense)            (None, 80)                8080      
                                                                 
 dense_15 (Dense)            (None, 60)                4860      
                                                                 
 dense_16 (Dense)            (None, 1)                 61        
                                                                 
Total params: 13901 (54.30 KB)
Trainable params: 13901 (54.30 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 2.0%, Test Accuracy: 97.0%


Unnamed: 0,Loss,Accuracy
0,0.02,0.97


In [18]:
# Optimizing (Using different learning rate, layers and hidden nodes)
# Final Attempt
from keras.optimizers import Adam

# Build the model
inputs = X_train.shape[1]
hidden_nodes_l1 = 100
hidden_nodes_l2 = 80
hidden_nodes_l3 = 60
hidden_nodes_l4 = 40
hidden_nodes_l5 = 30
model = Sequential()

# First Hidden Layer
model.add(Dense(units = hidden_nodes_l1, activation = 'relu', input_dim = inputs))

# Second Hidden Layer
model.add(Dense(units = hidden_nodes_l2, activation = 'relu'))

# Third Hidden Layer
model.add(Dense(units = hidden_nodes_l3, activation = 'relu'))

# Fourth Hidden Layer
model.add(Dense(units = hidden_nodes_l4, activation = 'relu'))

# Fifth Hidden Layer 
model.add(Dense(units = hidden_nodes_l5, activation = 'relu'))

# Output Layer
model.add(Dense(units = 1, activation = 'sigmoid'))

# Check model structure
model.summary()

# Compile model
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train model
model.fit(X_train, y_train, epochs = 16, batch_size = 10, validation_data = (X_test, y_test))

# Evaluate the model using the test data
loss, accuracy = model.evaluate(X_test ,y_test)
print(f"Test Loss: {round(loss,2)*100}%, Test Accuracy: {round(accuracy,2)*100}%")

Optimizing_final = pd.DataFrame({'Loss': [round(loss,2)], 'Accuracy': [round(accuracy,2)]})
Optimizing_final

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_17 (Dense)            (None, 100)               900       
                                                                 
 dense_18 (Dense)            (None, 80)                8080      
                                                                 
 dense_19 (Dense)            (None, 60)                4860      
                                                                 
 dense_20 (Dense)            (None, 40)                2440      
                                                                 
 dense_21 (Dense)            (None, 30)                1230      
                                                                 
 dense_22 (Dense)            (None, 1)                 31        
                                                                 
Total params: 17541 (68.52 KB)
Trainable params: 17541



Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
Test Loss: 1.0%, Test Accuracy: 97.0%


Unnamed: 0,Loss,Accuracy
0,0.01,0.97


In [19]:
# Put all the model optimizations into one dataframe to view results

analysis = pd.concat([model_original, Optimizing_1, Optimizing_2, Optimizing_3, Optimizing_final], ignore_index = True)
analysis = analysis.rename(index = {
    0: 'Original Model',
    1: '1st Model Optimization',
    2: '2nd Model Optimization',
    3: '3rd Model Optimization',
    4: 'Final Model Optimization'
})

analysis

Unnamed: 0,Loss,Accuracy
Original Model,0.02,0.97
1st Model Optimization,0.02,0.97
2nd Model Optimization,0.02,0.97
3rd Model Optimization,0.02,0.97
Final Model Optimization,0.01,0.97
