![](https://cdn.nba.net/nba-drupal-prod/2020-04/Video_T1_Hero_TopVideo.jpg)

# HERO OR ZERO

## Neural Network Machine Learning Model ¶

In [None]:
# Import the neccessary libraries
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
print("All neccessary libraries imported")

In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(42)


In [3]:
# Raw data from Resources directory (change the link as needed)
attribute_data = "Data/ML data.csv"

In [4]:
# Read the  data into a dataframe
df_attributes = pd.read_csv(attribute_data)
# Drop the columns with string values
df_attributes.drop(columns=['Player','Position','Team'],inplace=True)
# Let's see the shape and the first 5 rows of the dataframe
print('\033[1m'+"The shape of the {} dataframe is {}:" .format("attribute",df_attributes.shape)+'\033[0m')
df_attributes.head()

[1mThe shape of the attribute dataframe is (125, 10):[0m


Unnamed: 0,Age,Game,FG%,3P%,2P%,Reb,Ast,Pts,Year,Contract Salary
0,20,78,0.473,0.296,0.531,6.5,1.6,9.2,2015,4171680.0
1,21,80,0.454,0.288,0.528,5.1,1.9,12.7,2016,4351320.0
2,22,58,0.434,0.336,0.497,7.9,2.3,17.6,2017,5504420.0
3,23,78,0.449,0.349,0.499,7.4,3.7,16.0,2018,21590909.0
4,24,62,0.47,0.369,0.457,7.7,3.7,14.4,2019,19863636.0


In [5]:
# Let's see the column names in the dataframe
print(df_attributes.columns.tolist())

['Age', 'Game ', 'FG%', '3P%', '2P%', 'Reb', 'Ast', 'Pts', 'Year ', 'Contract Salary']


In [6]:
# Basic information of the df_fire dataframe by the info() method
df_attributes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Age              125 non-null    int64  
 1   Game             125 non-null    int64  
 2   FG%              125 non-null    float64
 3   3P%              125 non-null    float64
 4   2P%              125 non-null    float64
 5   Reb              125 non-null    float64
 6   Ast              125 non-null    float64
 7   Pts              125 non-null    float64
 8   Year             125 non-null    int64  
 9   Contract Salary  125 non-null    float64
dtypes: float64(7), int64(3)
memory usage: 9.9 KB


## Create a new column for popularity rank

In [26]:
# Create a new column with Rank 0 or 1 
df_attributes["Rank"] = df_attributes["Contract Salary"].apply(lambda x: 1 if x <=7000000 else 0)
# Let's see the new column
df_attributes.head()

Unnamed: 0,Age,Game,FG%,3P%,2P%,Reb,Ast,Pts,Year,Contract Salary,Rank
0,20,78,0.473,0.296,0.531,6.5,1.6,9.2,2015,4171680.0,1
1,21,80,0.454,0.288,0.528,5.1,1.9,12.7,2016,4351320.0,1
2,22,58,0.434,0.336,0.497,7.9,2.3,17.6,2017,5504420.0,1
3,23,78,0.449,0.349,0.499,7.4,3.7,16.0,2018,21590909.0,0
4,24,62,0.47,0.369,0.457,7.7,3.7,14.4,2019,19863636.0,0


## Creating input and output datasets¶

In [8]:
import numpy as np
# Create a copy of df_attributes
df_copy = df_attributes.copy()
# Create X and Y numpy arrays
y = df_copy[['Rank']].to_numpy() # Keep only the rank column
df_copy.drop(columns=['Contract Salary','Rank'],inplace=True) # Keep only the attributes
X = df_copy.to_numpy() # Convert the dataframe to numpy array
print('\033[1m'+"Shape of the X dataset: {}".format(X.shape)+'\033[0m')
print('\033[1m'+"Shape of the y dataset: {}".format(y.shape)+'\033[0m')

[1mShape of the X dataset: (125, 9)[0m
[1mShape of the y dataset: (125, 1)[0m


## Creating train and test splits

In [9]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Shape of the train & test set
print ('Train set:', X_train.shape,  y_train.shape)
print ('Test set:', X_test.shape,  y_test.shape)

Train set: (100, 9) (100, 1)
Test set: (25, 9) (25, 1)


## Data Preprocessing¶

In [10]:
# Importing StandardScaler from sklearn library
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [11]:
# Scaling both test and train data for X dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Install tensorflow module (if not available) by uncommenting below
#!pip install keras
#!pip install --upgrade tensorflow

## One hot encoding of labels¶

In [13]:
from tensorflow.keras.utils import to_categorical
# Converting the labels (y_train and y_test) to categorical values
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

## Defining the model architecture

In [14]:
from tensorflow.keras.models import Sequential
# Creating a sequential model
model = Sequential()

## Defining the input of the model

In [15]:
from tensorflow.keras.layers import Dense
number_inputs = 9
number_hidden_nodes = 27
model.add(Dense(units=number_hidden_nodes,
                activation='tanh', input_dim=number_inputs))

## Defining the input of the model

In [16]:
# Defining the output layer
number_classes = 2 # Labels we are trying to predict (either 'Hit' or 'Miss')
model.add(Dense(units=number_classes, activation='softmax'))

## Summary of the model

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 27)                270       
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 56        
Total params: 326
Trainable params: 326
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [18]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

## Training the model

In [19]:
# Fit (train) the model
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100, # Hundred iterations or loops
    shuffle=True,
    verbose=2
)

Epoch 1/100
4/4 - 0s - loss: 0.7239 - accuracy: 0.6100
Epoch 2/100
4/4 - 0s - loss: 0.6912 - accuracy: 0.6200
Epoch 3/100
4/4 - 0s - loss: 0.6615 - accuracy: 0.6400
Epoch 4/100
4/4 - 0s - loss: 0.6345 - accuracy: 0.6600
Epoch 5/100
4/4 - 0s - loss: 0.6099 - accuracy: 0.6600
Epoch 6/100
4/4 - 0s - loss: 0.5883 - accuracy: 0.6700
Epoch 7/100
4/4 - 0s - loss: 0.5654 - accuracy: 0.6900
Epoch 8/100
4/4 - 0s - loss: 0.5471 - accuracy: 0.7100
Epoch 9/100
4/4 - 0s - loss: 0.5300 - accuracy: 0.7100
Epoch 10/100
4/4 - 0s - loss: 0.5126 - accuracy: 0.7600
Epoch 11/100
4/4 - 0s - loss: 0.4997 - accuracy: 0.7900
Epoch 12/100
4/4 - 0s - loss: 0.4847 - accuracy: 0.7900
Epoch 13/100
4/4 - 0s - loss: 0.4719 - accuracy: 0.7900
Epoch 14/100
4/4 - 0s - loss: 0.4610 - accuracy: 0.8000
Epoch 15/100
4/4 - 0s - loss: 0.4504 - accuracy: 0.8100
Epoch 16/100
4/4 - 0s - loss: 0.4404 - accuracy: 0.8100
Epoch 17/100
4/4 - 0s - loss: 0.4322 - accuracy: 0.8200
Epoch 18/100
4/4 - 0s - loss: 0.4244 - accuracy: 0.8300
E

<tensorflow.python.keras.callbacks.History at 0x7f37f1706a50>

## Evaluating the model using the test data¶

In [20]:
# Evaluate the deep model using the testing data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - loss: 0.4052 - accuracy: 0.8400
Deep Neural Network - Loss: 0.40516364574432373, Accuracy: 0.8399999737739563
