
# This jupyter notebook was done as a refresher to using Multi-Layer Perceptron neural network from Scikit-learn. This notebook also has notes on applying grid search cross-validation on the MLP classifier.
## The dataset used is the UCI heart disease from kaggle.com
## Note that the results in this notebook are not optimal, but this notebook was done as practice using the classifier.


In [21]:

# importing important libraries
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [22]:

# reading the heart disease csv file
heart_df = pd.read_csv(r"C:\Users\KennoHead\Desktop\Data Science and Machine Learning Refresher\heart.csv")



In [23]:


# setting up the training and testing sets

heart_features_cols = list(heart_df.columns)
heart_features_cols.remove('target')

heart_features = heart_df[heart_features_cols]
heart_labels = heart_df['target']

features_training, features_testing, labels_training, labels_testing = train_test_split(heart_features,
                                                            heart_labels, test_size = 0.3, random_state = 0)


In [65]:

# notes
# hidden layer sizes: the ith element is the number of neurons for that ith layer
# in this case, 1st layers have 48 neurons, the 2nd layer has 24 neurons, 
# the 3rd hash 12 neurons, and the 4th has 6

# activation: the activation function should be non linear, logistic refers to the sigmoid function
# similar to the function used in logistic regression
# solver: the kind of minimization problem, in this case 'adam' refers to stochastic gradient descent
# alpha: the regularization term, or the penalty term to add into the error function
# learning rate init: the learning rate
# verbose : boolean whether to print progress or not
# tol: tolerance for the optimization, when the loss/score is not improving by at least the tol value
# in consecutive iterations

a_MLP_ANN = MLPClassifier(hidden_layer_sizes = (48, 24, 12, 6 ), activation = 'logistic',
                         solver = 'adam', alpha = 1e-5, random_state = 0,
                         learning_rate_init = 0.1, verbose = True, tol = 1e-6)


In [66]:

# to train the MLP ANN
a_MLP_ANN.fit(features_training, labels_training)


Iteration 1, loss = 0.69796975
Iteration 2, loss = 0.69378513
Iteration 3, loss = 0.70869283
Iteration 4, loss = 0.72662323
Iteration 5, loss = 0.70431848
Iteration 6, loss = 0.69034534
Iteration 7, loss = 0.68824715
Iteration 8, loss = 0.68688709
Iteration 9, loss = 0.68757769
Iteration 10, loss = 0.68773387
Iteration 11, loss = 0.68643943
Iteration 12, loss = 0.69038136
Iteration 13, loss = 0.69605791
Iteration 14, loss = 0.69865772
Iteration 15, loss = 0.69492893
Iteration 16, loss = 0.68885509
Iteration 17, loss = 0.68750973
Iteration 18, loss = 0.68696555
Iteration 19, loss = 0.68708253
Iteration 20, loss = 0.68740208
Iteration 21, loss = 0.68891778
Iteration 22, loss = 0.68871070
Training loss did not improve more than tol=0.000001 for 10 consecutive epochs. Stopping.


MLPClassifier(activation='logistic', alpha=1e-05,
              hidden_layer_sizes=(48, 24, 12, 6), learning_rate_init=0.1,
              random_state=0, tol=1e-06, verbose=True)

In [67]:

# a_MLP_ANN.coefs_, use this to get the weight matrix corresponding to the layer i.
# a_MLP_ANN.intercepts_, use this to get the bias vector for the ith layer

# .predict to predict the labels of the test features
MLP_ANN_predictions = a_MLP_ANN.predict(features_testing)

MLP_ANN_acc = accuracy_score(labels_testing, MLP_ANN_predictions)

print(MLP_ANN_acc)

# .predict_proba to get probabilities of each class/label
MLP_ANN_probabilities = a_MLP_ANN.predict_proba(features_testing)

print (MLP_ANN_probabilities)

0.5164835164835165
[[0.46199918 0.53800082]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.46199919 0.53800081]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.46199918 0.53800082]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.4619992  0.5380008 ]
 [0.46

In [68]:

# using confusion matrices
# note that with 2 labels, there should be 2 classes
# so the matrix will be 2x2 in dimensions

Con_Matrix = metrics.confusion_matrix(labels_testing, MLP_ANN_predictions)

print(Con_Matrix)


[[ 0 44]
 [ 0 47]]


In [69]:

FPR, TPR, thresholds = metrics.roc_curve(labels_testing, MLP_ANN_probabilities[:,1], pos_label = 1 )

MLP_ANN_AUC = metrics.auc(FPR, TPR)

print(MLP_ANN_AUC)


0.44825918762088973
