In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
!pip install python-mnist

In [0]:
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from mnist import MNIST
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

## We load the MNIST data into the training and testing sets

In [0]:
mndata = MNIST('/content/drive/My Drive/SMAI_Assignment3_Dataset/dataset_q3')
mndata.gz = True
X_train, y_train = mndata.load_training()

In [125]:
X_test, y_test = mndata.load_testing()
print(len(X_test))
print(len(y_test))
print(len(X_test[0]))
y_test = list(y_test)

10000
10000
784


In [126]:
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
print(X_train.shape)
print(y_train.shape)

(60000, 784)
(60000,)


# Support Vector Machine (SVM)

## SVM model 1 using C=2 and the default kernel = 'rbf'

In [0]:
from sklearn.svm import SVC
clf = SVC(C=2.0)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [128]:
a_svm1 = accuracy
f1_svm1 = f1_score(y_test, y_pred, average='weighted')
p_svm1 = precision_score(y_test, y_pred, average='weighted')
r_svm1 = recall_score(y_test, y_pred, average='weighted')
print("Accuracy: ", a_svm1)
print("f1 score: ",f1_svm1)
print("Precision score: ",p_svm1)
print("Recall score: ",r_svm1)
print("Confusion matrix: \n", confusion_matrix(y_test, y_pred))

Accuracy:  0.9831
f1 score:  0.9830916493531652
Precision score:  0.9830983682809287
Recall score:  0.9831
Confusion matrix: 
 [[ 973    0    1    0    0    3    0    1    2    0]
 [   0 1127    3    1    0    1    1    1    1    0]
 [   6    1 1012    0    1    0    1    7    3    1]
 [   0    0    1  997    0    3    0    4    3    2]
 [   0    0    4    0  966    0    3    0    1    8]
 [   2    0    0    6    1  876    3    0    3    1]
 [   4    2    0    0    2    3  946    0    1    0]
 [   0    5   10    2    1    0    0 1005    0    5]
 [   3    0    2    3    4    3    1    2  951    5]
 [   2    2    0    7   10    1    1    7    1  978]]


## SVM model 2 using C=2 and kernel = 'poly'

In [0]:
clf = SVC(C=2.0, kernel='poly')
clf.fit(X_train, y_train)
y_pred1 = clf.predict(X_test)
accuracy1 = accuracy_score(y_test, y_pred1)

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [130]:
a_svm2 = accuracy1
f1_svm2 = f1_score(y_test, y_pred1, average='weighted')
p_svm2 = precision_score(y_test, y_pred1, average='weighted')
r_svm2 = recall_score(y_test, y_pred1, average='weighted')
print("Accuracy: ", a_svm2)
print("f1 score: ",f1_svm2)
print("Precision score: ",p_svm2)
print("Recall score: ",r_svm2)
print("Confusion matrix: \n", confusion_matrix(y_test, y_pred1))

Accuracy:  0.9785
f1 score:  0.9784814949100687
Precision score:  0.9785009643261748
Recall score:  0.9785
Confusion matrix: 
 [[ 971    0    1    1    0    3    1    1    2    0]
 [   0 1128    2    1    0    0    3    0    1    0]
 [   7    3 1006    0    2    0    4    8    2    0]
 [   0    2    2  986    0    6    0    5    5    4]
 [   2    0    2    0  965    0    4    0    0    9]
 [   2    0    1    9    1  867    4    1    5    2]
 [   4    5    2    0    3    4  938    0    2    0]
 [   0   11    8    1    1    0    0  999    0    8]
 [   3    0    1    4    4    3    1    3  953    2]
 [   2    6    1    4   12    5    1    4    2  972]]


## SVM model 3 using LinearSVC module and C=1. 
### The maximum number of iterations allowed is set as 10000 because of which  we see a drop in the accuracy as it did not converge in 10000 iterations.

In [131]:
from sklearn.svm import LinearSVC
clf_c1 = LinearSVC(C=1.0, max_iter=10000)
clf_c1.fit(X_train, y_train)
y_pred_c1 = clf_c1.predict(X_test)
accuracy_c1 = accuracy_score(y_test, y_pred_c1)



### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [132]:
a_svm3 = accuracy_c1
f1_svm3 = f1_score(y_test, y_pred_c1, average='weighted')
p_svm3 = precision_score(y_test, y_pred_c1, average='weighted')
r_svm3 = recall_score(y_test, y_pred_c1, average='weighted')
print("Accuracy: ", a_svm3)
print("f1 score: ",f1_svm3)
print("Precision score: ",p_svm3)
print("Recall score: ",r_svm3)
print("Confusion matrix: \n", confusion_matrix(y_test, y_pred_c1))

Accuracy:  0.8782
f1 score:  0.8786973405507397
Precision score:  0.8847569307948001
Recall score:  0.8782
Confusion matrix: 
 [[ 905    1   15    2    2   19    7    9   10   10]
 [   0 1102   15    2    0    1    4    0   11    0]
 [   3   13  937   12   13    3   11   11   25    4]
 [   3    4   49  850    3   28    2    8   35   28]
 [   0    5   11    1  906    0    3    1    4   51]
 [   4    6   10   29   21  750   17    4   31   20]
 [   8    5   43    1    6   20  867    0    7    1]
 [   0   17   29    2   17    2    1  788    4  168]
 [   3    8   52   22   15   32   14   10  758   60]
 [   1   10   11    8   39    7    0    6    8  919]]


# Summary of the SVM models

In [146]:
svm_l1 = ["SVM model 1", a_svm1, f1_svm1, p_svm1, r_svm1]
svm_l2 = ["SVM model 2", a_svm2, f1_svm2, p_svm2, r_svm2]
svm_l3 = ["SVM model 3", a_svm3, f1_svm3, p_svm3, r_svm3]
data = [svm_l1, svm_l2, svm_l3]
df1 = pd.DataFrame(data, columns = ['Model', 'Accuracy', 'F1 score', 'Precision score', 'Recall score'])
df1

Unnamed: 0,Model,Accuracy,F1 score,Precision score,Recall score
0,SVM model 1,0.9831,0.983092,0.983098,0.9831
1,SVM model 2,0.9785,0.978481,0.978501,0.9785
2,SVM model 3,0.8782,0.878697,0.884757,0.8782


# Multi Layer Perceptron (MLP)

### First we change our labels to categorical data.

In [0]:
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

## MLP model 1 with 1 hidden layer of 30 nodes using activation function 'sigmoid'
### We use the 'sgd' optimizer.

In [100]:
model1 = Sequential()
model1.add(Dense(30, activation = "sigmoid", input_shape=(784,)))
model1.add(Dense(10, activation="softmax"))
model1.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
model1.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f606e923b70>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [104]:
score1 = model1.evaluate(X_test, y_test)
print('Loss = ', score1[0], ' Accuracy = ', score1[1])
a_nn1 = score1[1]

Loss =  0.26132565671801566  Accuracy =  0.9248


In [117]:
y_pred1 = model1.predict(X_test)
pred1 = []
for i in range(len(y_pred1)):
    pred1.append(np.argmax(y_pred1[i]))

f1_nn1 = f1_score(y_test, pred1, average='weighted')
p_nn1 = precision_score(y_test, pred1, average='weighted')
r_nn1 = recall_score(y_test, pred1, average='weighted')
print("f1 score: ",f1_nn1)
print("Precision score: ",p_nn1)
print("Recall score: ",r_nn1)
print("Confusion matrix: \n", confusion_matrix(y_test, pred1))

f1 score:  0.924704902567081
Precision score:  0.9249559572143476
Recall score:  0.9248
Confusion matrix: 
 [[ 953    0    1    3    0    5    9    5    3    1]
 [   0 1114    3    4    0    2    2    0   10    0]
 [  13    3  924   18   10    1   19   16   24    4]
 [   5    1   23  914    0   20    3   12   23    9]
 [   3    1    2    0  902    1    7    8    6   52]
 [   8    1    3   43    7  784   12    8   18    8]
 [  16    3    6    1    9   11  898    3   11    0]
 [   3   12   19    8    6    2    0  948    9   21]
 [   3   11    3   16    5   15   10   11  890   10]
 [   5    7    2    7   24    6    1   21   15  921]]


## MLP model 2 with 1 hidden layer of 100 nodes using activation function 'sigmoid'
### We use the 'sgd' optimizer.
### Thus in this model we test by increasing the number of nodes in the hidden layer.

In [105]:
model2 = Sequential()
model2.add(Dense(100, activation = "sigmoid", input_shape=(784,)))
model2.add(Dense(10, activation="softmax"))
model2.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=['accuracy'])
model2.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f60652cedd8>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [106]:
score2 = model2.evaluate(X_test, y_test)
print('Loss = ', score2[0], ' Accuracy = ', score2[1])
a_nn2 = score2[1]

Loss =  0.1761124474093318  Accuracy =  0.9472


In [118]:
y_pred2 = model2.predict(X_test)
pred2 = []
for i in range(len(y_pred2)):
    pred2.append(np.argmax(y_pred2[i]))

f1_nn2 = f1_score(y_test, pred2, average='weighted')
p_nn2 = precision_score(y_test, pred2, average='weighted')
r_nn2 = recall_score(y_test, pred2, average='weighted')
print("f1 score: ",f1_nn2)
print("Precision score: ",p_nn2)
print("Recall score: ",r_nn2)
print("Confusion matrix: \n", confusion_matrix(y_test, pred2))

f1 score:  0.9471341905677948
Precision score:  0.9472052295536872
Recall score:  0.9472
Confusion matrix: 
 [[ 965    0    0    1    0    4    6    2    2    0]
 [   0 1115    2    3    1    1    4    2    7    0]
 [   6    1  965   12    8    6    9   11   10    4]
 [   2    2   12  935    1   31    0   11   12    4]
 [   1    0    3    1  926    1   12    1    3   34]
 [   4    1    4   26    1  825   14    4    7    6]
 [  10    2    1    2    5    8  926    0    4    0]
 [   1    7   21    4    1    0    0  976    2   16]
 [   7    5    5   14    8   14   14   10  892    5]
 [   6    5    1    6   15    5    1   17    6  947]]


## MLP model 3 with 1 hidden layer of 100 nodes using activation function 'relu'
### We use the 'adam' optimizer.
### In this model, we test by changing our activation function as well as the optimizer.


In [107]:
model3 = Sequential()
model3.add(Dense(100, activation = "relu", input_shape=(784,)))
model3.add(Dense(10, activation="softmax"))
model3.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])
model3.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f606512c780>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [108]:
score3 = model3.evaluate(X_test, y_test)
print('Loss = ', score3[0], ' Accuracy = ', score3[1])
a_nn3 = score3[1]

Loss =  0.4869805608151073  Accuracy =  0.9459


In [119]:
y_pred3 = model3.predict(X_test)
pred3 = []
for i in range(len(y_pred3)):
    pred3.append(np.argmax(y_pred3[i]))

f1_nn3 = f1_score(y_test, pred3, average='weighted')
p_nn3 = precision_score(y_test, pred3, average='weighted')
r_nn3 = recall_score(y_test, pred3, average='weighted')
print("f1 score: ",f1_nn3)
print("Precision score: ",p_nn3)
print("Recall score: ",r_nn3)
print("Confusion matrix: \n", confusion_matrix(y_test, pred3))

f1 score:  0.946497887611948
Precision score:  0.948777620415543
Recall score:  0.9459
Confusion matrix: 
 [[ 958    0    2    1    0    2    2    1   14    0]
 [   0 1106    3    2    0    2    1    0   21    0]
 [   5    3  971    5    1    1    3    8   34    1]
 [   1    1   10  952    0   14    0    4   25    3]
 [   2    1    3    0  902    0    3    5   26   40]
 [   3    0    1   28    2  809    7    1   38    3]
 [   7    4    3    0    4   10  904    0   26    0]
 [   0    6   13   12    5    2    0  974    6   10]
 [   3    0    3    8    2    6    3    0  944    5]
 [   3    3    0   12   15    7    0    4   26  939]]


## MLP model 4 with 1 hidden layer of 100 nodes using activation function 'sigmoid'
### We use the 'adam' optimizer.

In [109]:
model4 = Sequential()
model4.add(Dense(100, input_dim=784, activation='sigmoid'))
model4.add(Dense(10, activation = "softmax"))
model4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model4.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f6064e21160>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [110]:
score4 = model4.evaluate(X_test, y_test)
print('Loss = ', score4[0], ' Accuracy = ', score4[1])
a_nn4 = score4[1]

Loss =  0.16905173732340337  Accuracy =  0.9482


In [120]:
y_pred4 = model4.predict(X_test)
pred4 = []
for i in range(len(y_pred4)):
    pred4.append(np.argmax(y_pred4[i]))

f1_nn4 = f1_score(y_test, pred4, average='weighted')
p_nn4 = precision_score(y_test, pred4, average='weighted')
r_nn4 = recall_score(y_test, pred4, average='weighted')
print("f1 score: ",f1_nn4)
print("Precision score: ",p_nn4)
print("Recall score: ",r_nn4)
print("Confusion matrix: \n", confusion_matrix(y_test, pred4))

f1 score:  0.9481685590783149
Precision score:  0.9482358568300188
Recall score:  0.9482
Confusion matrix: 
 [[ 962    0    1    2    0    3    7    2    3    0]
 [   0 1110    3    4    0    2    3    2   11    0]
 [   8    1  979    6    5    3    4   11   13    2]
 [   1    0   13  950    0   16    0   13   14    3]
 [   1    2    6    0  923    1   10    3    3   33]
 [  10    0    1   23    2  824   10    1   19    2]
 [   7    4    5    0    5    7  922    0    8    0]
 [   0    4   27    5    1    0    0  974    2   15]
 [  11    3    4    8    4   10    8    6  908   12]
 [   8    6    4   12   28    5    1    8    7  930]]


## MLP model 5 with 2 hidden layers of 100 nodes each using activation function 'sigmoid'
### We use the 'adam' optimizer.
### In this model we increase the number of hidden layers.

In [111]:
model5 = Sequential()
model5.add(Dense(100, input_dim=784, activation='sigmoid'))
model5.add(Dense(100, input_dim=784, activation='sigmoid'))
model5.add(Dense(10, activation = "softmax"))
model5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model5.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f6064b57470>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [112]:
score5 = model5.evaluate(X_test, y_test)
print('Loss = ', score5[0], ' Accuracy = ', score5[1])
a_nn5 = score5[1]

Loss =  0.14642892222478987  Accuracy =  0.9543


In [121]:
y_pred5 = model5.predict(X_test)
pred5 = []
for i in range(len(y_pred5)):
    pred5.append(np.argmax(y_pred5[i]))

f1_nn5 = f1_score(y_test, pred5, average='weighted')
p_nn5 = precision_score(y_test, pred5, average='weighted')
r_nn5 = recall_score(y_test, pred5, average='weighted')
print("f1 score: ",f1_nn5)
print("Precision score: ",p_nn5)
print("Recall score: ",r_nn5)
print("Confusion matrix: \n", confusion_matrix(y_test, pred5))

f1 score:  0.9542756426779866
Precision score:  0.9544382887107254
Recall score:  0.9543
Confusion matrix: 
 [[ 965    1    1    0    0    3    3    2    2    3]
 [   0 1121    3    4    0    0    2    1    4    0]
 [  10    2  987    8    3    2    4    8    7    1]
 [   1    0   15  950    0   15    0    9   12    8]
 [   1    1    6    0  928    1    4    2    2   37]
 [   5    0    2   25    2  836    7    4    7    4]
 [  10    3    3    0    6   14  917    0    5    0]
 [   2    9   15    8    1    0    0  980    0   13]
 [  11    3   10   14    5   10    4    7  897   13]
 [   1    5    0    7   11    6    1    9    7  962]]


## MLP model 6 with 3 hidden layers of 100 nodes each using activation function 'sigmoid'
### We use the 'adam' optimizer.
### In this model we further increase the number of hidden layers by 1.

In [113]:
model6 = Sequential()
model6.add(Dense(100, input_dim=784, activation='sigmoid'))
model6.add(Dense(100, input_dim=784, activation='sigmoid'))
model6.add(Dense(100, input_dim=784, activation='sigmoid'))
model6.add(Dense(10, activation = "softmax"))
model6.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model6.fit(X_train, y_train, epochs=40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f60647d57f0>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [114]:
score6 = model6.evaluate(X_test, y_test)
print('Loss = ', score6[0], ' Accuracy = ', score6[1])
a_nn6 = score6[1]

Loss =  0.14187228043526412  Accuracy =  0.9527


In [122]:
y_pred6 = model6.predict(X_test)
pred6 = []
for i in range(len(y_pred6)):
    pred6.append(np.argmax(y_pred6[i]))

f1_nn6 = f1_score(y_test, pred6, average='weighted')
p_nn6 = precision_score(y_test, pred6, average='weighted')
r_nn6 = recall_score(y_test, pred6, average='weighted')
print("f1 score: ",f1_nn6)
print("Precision score: ",p_nn6)
print("Recall score: ",r_nn6)
print("Confusion matrix: \n", confusion_matrix(y_test, pred6))

f1 score:  0.952713292371295
Precision score:  0.9528063644041739
Recall score:  0.9527
Confusion matrix: 
 [[ 957    0    0    2    0    1    8    2    4    6]
 [   0 1117    5    1    0    1    1    0    9    1]
 [   7    3  984   14    1    2    3    9    8    1]
 [   0    1    9  960    0   10    0   11   18    1]
 [   2    0    4    0  936    2    7    3    3   25]
 [   6    2    0   23    2  838    9    1    8    3]
 [   6    4    4    0    6   11  921    0    5    1]
 [   1    9   13    4    3    0    0  970    1   27]
 [   0    2    7   21    7    7    4    3  913   10]
 [   7    7    0   10   14    9    1   10   20  931]]


# Summary of the MLP models

In [136]:
nn_l1 = ["MLP model 1", a_nn1, f1_nn1, p_nn1, r_nn1]
nn_l2 = ["MLP model 2", a_nn2, f1_nn2, p_nn2, r_nn2]
nn_l3 = ["MLP model 3", a_nn3, f1_nn3, p_nn3, r_nn3]
nn_l4 = ["MLP model 4", a_nn4, f1_nn4, p_nn4, r_nn4]
nn_l5 = ["MLP model 5", a_nn5, f1_nn5, p_nn5, r_nn5]
nn_l6 = ["MLP model 6", a_nn6, f1_nn6, p_nn6, r_nn6]
data = [nn_l1, nn_l2, nn_l3, nn_l4, nn_l5, nn_l6]
df = pd.DataFrame(data, columns = ['Model', 'Accuracy', 'F1 score', 'Precision score', 'Recall score'])
df

Unnamed: 0,Model,Accuracy,F1 score,Precision score,Recall score
0,MLP model 1,0.9248,0.924705,0.924956,0.9248
1,MLP model 2,0.9472,0.947134,0.947205,0.9472
2,MLP model 3,0.9459,0.946498,0.948778,0.9459
3,MLP model 4,0.9482,0.948169,0.948236,0.9482
4,MLP model 5,0.9543,0.954276,0.954438,0.9543
5,MLP model 6,0.9527,0.952713,0.952806,0.9527


# Covolutional Neural Network (CNN)

In [0]:
!pip install tensorflow==1.14

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D

In [0]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)
# X_train.dtype
X_train = X_train.astype('float32')
# X_train.dtype
X_test = X_test.astype('float32')
X_train = X_train/255
X_test = X_test/255

In [9]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60000, 28, 28, 1)
(60000, 10)
(10000, 28, 28, 1)
(10000, 10)


## CNN model 1 using 1 Conv2D layer using activation function 'sigmoid' 

In [16]:
model1 = Sequential()
model1.add(Conv2D(28, kernel_size=(3,3), activation = 'sigmoid', input_shape=input_shape))
model1.add(MaxPooling2D(pool_size=(2, 2)))
model1.add(Flatten())
model1.add(Dense(10,activation='softmax'))
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model1.fit(X_train, y_train, batch_size=128, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6067fc9470>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [71]:
score1 = model1.evaluate(X_test, y_test)
print('Loss = ', score1[0], ' Accuracy = ', score1[1])
a_cnn1 = score1[1]

Loss =  0.18071984765529633  Accuracy =  0.948


In [65]:
y_pred1 = model1.predict(X_test)
pred1 = []
for i in range(len(y_pred1)):
    pred1.append(np.argmax(y_pred1[i]))

f1_cnn1 = f1_score(y_test, pred1, average='weighted')
p_cnn1 = precision_score(y_test, pred1, average='weighted')
r_cnn1 = recall_score(y_test, pred1, average='weighted')
print("f1 score: ",f1_cnn1)
print("Precision score: ",p_cnn1)
print("Recall score: ",r_cnn1)
print("Confusion matrix: \n", confusion_matrix(y_test, pred1))

f1 score:  0.9478929253148535
Precision score:  0.9487443580517673
Recall score:  0.948
Confusion matrix: 
 [[ 974    0    1    1    0    1    0    2    1    0]
 [   0 1124    4    1    1    1    1    0    3    0]
 [  10    4  963   10   13    0    1   12   16    3]
 [   2    0   14  944    2   15    0   14   14    5]
 [   3    0    5    0  952    0    0    3    4   15]
 [  10    2    3   12    4  839    2    5   13    2]
 [  25    4   14    0   26   25  855    2    7    0]
 [   3    9   19    5    6    0    0  978    0    8]
 [   7    2    8   10    7    5    0    5  919   11]
 [   9    6    1    5   26    2    0   18   10  932]]


## CNN model 2 using 2 Conv2D layers using activation function 'relu' 
### Thus in this model we test by adding another convolutional layer, increasing the number of nodes in one of the convolutional layers and changing the activation function.

In [19]:
model2 = Sequential()
model2.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
model2.add(Conv2D(64, (3, 3), activation='relu'))
model2.add(MaxPooling2D(pool_size=(2, 2)))
model2.add(Flatten())
model2.add(Dense(10,activation='softmax'))
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model2.fit(X_train, y_train, batch_size=128, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6067c78b38>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [70]:
score2 = model2.evaluate(X_test, y_test)
print('Loss = ', score2[0], ' Accuracy = ', score2[1])
a_cnn2 = score2[1]

Loss =  0.04824989859047273  Accuracy =  0.988


In [64]:
y_pred2 = model2.predict(X_test)
pred2 = []
for i in range(len(y_pred2)):
    pred2.append(np.argmax(y_pred2[i]))

f1_cnn2 = f1_score(y_test, pred2, average='weighted')
p_cnn2 = precision_score(y_test, pred2, average='weighted')
r_cnn2 = recall_score(y_test, pred2, average='weighted')
print("f1 score: ",f1_cnn2)
print("Precision score: ",p_cnn2)
print("Recall score: ",r_cnn2)
print("Confusion matrix: \n", confusion_matrix(y_test, pred2))

f1 score:  0.9880025565099859
Precision score:  0.9880673420408137
Recall score:  0.988
Confusion matrix: 
 [[ 970    0    0    1    0    0    5    0    1    3]
 [   0 1128    1    2    0    0    2    0    2    0]
 [   1    1 1017    6    0    0    1    5    1    0]
 [   0    0    0 1008    0    2    0    0    0    0]
 [   0    1    0    0  976    0    1    0    0    4]
 [   1    0    0   10    0  879    1    0    0    1]
 [   2    2    0    1    1    1  950    0    1    0]
 [   0    4    7    3    0    0    0 1010    1    3]
 [   4    0    3    3    0    2    2    2  956    2]
 [   0    3    0    7    5    3    0    2    3  986]]


## CNN model 3 using 2 Conv2D layers using activation function 'relu' and another layer of 128 nodes.
### Thus in this model we test by adding another layer.

In [21]:
model3 = Sequential()
model3.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=input_shape))
model3.add(Dense(64, activation='relu'))
model3.add(MaxPooling2D(pool_size=(2, 2)))
model3.add(Flatten())
model3.add(Dense(128, activation='relu'))
model3.add(Dense(10,activation='softmax'))
model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model3.fit(X_train, y_train, batch_size=128, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f60677c9eb8>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [69]:
score3 = model3.evaluate(X_test, y_test)
print('Loss = ', score3[0], ' Accuracy = ', score3[1])
a_cnn3 = score3[1]

Loss =  0.04637024184245347  Accuracy =  0.9887


In [63]:
y_pred3 = model3.predict(X_test)
pred3 = []
for i in range(len(y_pred3)):
    pred3.append(np.argmax(y_pred3[i]))

f1_cnn3 = f1_score(y_test, pred3, average='weighted')
p_cnn3 = precision_score(y_test, pred3, average='weighted')
r_cnn3 = recall_score(y_test, pred3, average='weighted')
print("f1 score: ",f1_cnn3)
print("Precision score: ",p_cnn3)
print("Recall score: ",r_cnn3)
print("Confusion matrix: \n", confusion_matrix(y_test, pred3))

f1 score:  0.9886963829908504
Precision score:  0.9887282417107286
Recall score:  0.9887
Confusion matrix: 
 [[ 979    0    0    0    0    0    0    1    0    0]
 [   0 1131    2    1    0    0    0    0    1    0]
 [   2    2 1019    1    1    0    0    4    3    0]
 [   0    0    1 1005    0    1    0    1    2    0]
 [   1    1    2    0  963    0    2    0    2   11]
 [   1    0    0    6    0  882    2    0    0    1]
 [   7    2    1    0    2    1  944    0    1    0]
 [   0    2    7    4    0    0    0 1012    1    2]
 [   3    0    1    3    0    1    1    1  960    4]
 [   2    1    1    2    3    5    0    1    2  992]]


## CNN model 4 using 2 Conv2D layers using activation function 'relu' and another layer of 128 nodes.
### We also add dropout layers.

In [23]:
model4 = Sequential()
model4.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
model4.add(Conv2D(64, (3, 3), activation='relu'))
model4.add(MaxPooling2D(pool_size=(2, 2)))
model4.add(Dropout(0.25))
model4.add(Flatten())
model4.add(Dense(128, activation='relu'))
model4.add(Dropout(0.5))
model4.add(Dense(10, activation='softmax'))
model4.compile(optimizer=keras.optimizers.Adadelta(), loss='categorical_crossentropy', metrics=['accuracy'])
model4.fit(X_train, y_train, batch_size=128, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f60672e0048>

### The accuracy, f1 score, precision score, recall score and confusion matrix obtained by running the model is shown below

In [67]:
score4 = model4.evaluate(X_test, y_test)
print('Loss = ', score4[0], ' Accuracy = ', score4[1])
a_cnn4 = score4[1]

Loss =  0.027261577927844338  Accuracy =  0.9904


In [62]:
y_pred4 = model4.predict(X_test)
pred4 = []
for i in range(len(y_pred4)):
    pred4.append(np.argmax(y_pred4[i]))

f1_cnn4 = f1_score(y_test, pred4, average='weighted')
p_cnn4 = precision_score(y_test, pred4, average='weighted')
r_cnn4 = recall_score(y_test, pred4, average='weighted')
print("f1 score: ",f1_cnn4)
print("Precision score: ",p_cnn4)
print("Recall score: ",r_cnn4)
print("Confusion matrix: \n", confusion_matrix(y_test, pred4))

f1 score:  0.9903989674914703
Precision score:  0.9904144613900989
Recall score:  0.9904
Confusion matrix: 
 [[ 974    0    0    0    0    0    4    1    1    0]
 [   0 1130    1    1    0    1    2    0    0    0]
 [   1    1 1022    1    1    0    2    2    2    0]
 [   0    0    3 1002    0    4    0    0    1    0]
 [   0    0    0    0  974    0    3    0    2    3]
 [   2    0    0    4    0  883    3    0    0    0]
 [   3    1    0    0    1    1  951    0    1    0]
 [   0    2    6    1    0    0    0 1017    1    1]
 [   4    0    1    0    0    0    3    1  962    3]
 [   2    1    0    1    6    5    0    3    2  989]]


# Summary of the CNN models

In [144]:
cnn_l1 = ["CNN model 1", a_cnn1, f1_cnn1, p_cnn1, r_cnn1]
cnn_l2 = ["CNN model 2", a_cnn2, f1_cnn2, p_cnn2, r_cnn2]
cnn_l3 = ["CNN model 3", a_cnn3, f1_cnn3, p_cnn3, r_cnn3]
cnn_l4 = ["CNN model 4", a_cnn4, f1_cnn4, p_cnn4, r_cnn4]
data = [cnn_l1, cnn_l2, cnn_l3, cnn_l4]
df = pd.DataFrame(data, columns = ['Model', 'Accuracy', 'F1 score', 'Precision score', 'Recall score'])
df

Unnamed: 0,Model,Accuracy,F1 score,Precision score,Recall score
0,CNN model 1,0.948,0.947893,0.948744,0.948
1,CNN model 2,0.988,0.988003,0.988067,0.988
2,CNN model 3,0.9887,0.988696,0.988728,0.9887
3,CNN model 4,0.9904,0.990399,0.990414,0.9904


# Overall Summary of all the models

In [147]:
data = [svm_l1, svm_l2, svm_l3, nn_l1, nn_l2, nn_l3, nn_l4, nn_l5, nn_l6, cnn_l1, cnn_l2, cnn_l3, cnn_l4]
df = pd.DataFrame(data, columns = ['Model', 'Accuracy', 'F1 score', 'Precision score', 'Recall score'])
df

Unnamed: 0,Model,Accuracy,F1 score,Precision score,Recall score
0,SVM model 1,0.9831,0.983092,0.983098,0.9831
1,SVM model 2,0.9785,0.978481,0.978501,0.9785
2,SVM model 3,0.8782,0.878697,0.884757,0.8782
3,MLP model 1,0.9248,0.924705,0.924956,0.9248
4,MLP model 2,0.9472,0.947134,0.947205,0.9472
5,MLP model 3,0.9459,0.946498,0.948778,0.9459
6,MLP model 4,0.9482,0.948169,0.948236,0.9482
7,MLP model 5,0.9543,0.954276,0.954438,0.9543
8,MLP model 6,0.9527,0.952713,0.952806,0.9527
9,CNN model 1,0.948,0.947893,0.948744,0.948
