In [2]:
import numpy as np
from mnist import MNIST
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [3]:
pca = PCA(n_components=500)
sc = StandardScaler()

In [4]:
mnist = MNIST('./dataset')
x_train, y_train = mnist.load_training()
x_test, y_test = mnist.load_testing() 

In [5]:
x_train = np.asarray(x_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.int32)
x_test = np.asarray(x_test).astype(np.float32)
y_test = np.asarray(y_test).astype(np.int32)
x_trainmlp = sc.fit_transform(x_train)
x_testmlp = sc.transform(x_test)
x_train = x_train.reshape(x_train.shape[0], 28, 28,1)
x_test = x_test.reshape(x_test.shape[0], 28, 28,1)

Convolutional Neural Network implemented using Tensorflow, keras

In [5]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models



In [6]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

In [7]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model_log = model.fit(x_train, y_train,
          batch_size=128,
          epochs=5,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [8]:
y_pred = model.predict_classes(x_test)
print("\nAccuracy score for MLP\n",accuracy_score(y_test, y_pred))
print("\nConfusion matrix for MLP\n",confusion_matrix(y_test,y_pred))
print("\nClassification report for MLP\n",classification_report(y_test,y_pred))


Accuracy score for MLP
 0.9878

Confusion matrix for MLP
 [[ 971    1    1    0    2    1    2    0    1    1]
 [   0 1133    0    0    0    0    1    1    0    0]
 [   3    3 1014    2    1    0    0    4    5    0]
 [   0    1    2  980    0   12    0    3    9    3]
 [   0    0    0    0  976    0    0    0    0    6]
 [   0    0    0    2    1  884    1    1    2    1]
 [   3    2    0    0    2    1  947    0    3    0]
 [   0    4    9    0    0    0    0 1010    1    4]
 [   1    0    1    0    0    0    0    0  970    2]
 [   1    1    0    0    6    4    0    3    1  993]]

Classification report for MLP
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       980
           1       0.99      1.00      0.99      1135
           2       0.99      0.98      0.98      1032
           3       1.00      0.97      0.98      1010
           4       0.99      0.99      0.99       982
           5       0.98      0.99      0.99       892

In [9]:
model1 = models.Sequential()
model1.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model1.add(layers.MaxPooling2D((2, 2)))
model1.add(layers.Conv2D(64, (3, 3), activation='relu'))
model1.add(layers.Dropout(0.25))
model1.add(layers.Flatten())
model1.add(layers.Dense(128, activation='relu'))
model1.add(layers.Dense(10, activation='softmax'))
model1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model_log = model1.fit(x_train, y_train,
          epochs=5,
          verbose=1,
          validation_data=(x_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
y_pred = model1.predict_classes(x_test)
print("\nAccuracy score for MLP\n",accuracy_score(y_test, y_pred))
print("\nConfusion matrix for MLP\n",confusion_matrix(y_test,y_pred))
print("\nClassification report for MLP\n",classification_report(y_test,y_pred))


Accuracy score for MLP
 0.0892

Confusion matrix for MLP
 [[   0    0    0    0    0  980    0    0    0    0]
 [   0    0    0    0    0 1135    0    0    0    0]
 [   0    0    0    0    0 1032    0    0    0    0]
 [   0    0    0    0    0 1010    0    0    0    0]
 [   0    0    0    0    0  982    0    0    0    0]
 [   0    0    0    0    0  892    0    0    0    0]
 [   0    0    0    0    0  958    0    0    0    0]
 [   0    0    0    0    0 1028    0    0    0    0]
 [   0    0    0    0    0  974    0    0    0    0]
 [   0    0    0    0    0 1009    0    0    0    0]]

Classification report for MLP
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       980
           1       0.00      0.00      0.00      1135
           2       0.00      0.00      0.00      1032
           3       0.00      0.00      0.00      1010
           4       0.00      0.00      0.00       982
           5       0.09      1.00      0.16       892

  _warn_prf(average, modifier, msg_start, len(result))


The best case for CNN is the first case giving an accuracy of close to 0.9. Here from the 28X28 size, multiple convolution layers of 32/64 filters of size 3X3. Later the model is flattened, densed into 64 classes and later divided into the 10 layers as there are 10 distinct outputs. This is later passed through 5 epochs of batch size 128. But in comparison with the 2nd case, the model has dropped a convolution layer, uses dropout to increase more convergence and is densed into 128 layers before 10. Unfortunately, it gives a poor accuracy. 

MLP Classifier applied for the same data

In [6]:
from sklearn.neural_network import MLPClassifier

In [7]:
mlp = MLPClassifier(hidden_layer_sizes=(20, 3), max_iter=150, alpha=1e-4,
                    solver='sgd',learning_rate_init=.1)

mlp.fit(x_trainmlp, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(20, 3), learning_rate='constant',
              learning_rate_init=0.1, max_fun=15000, max_iter=150, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='sgd', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [8]:
y_pred = mlp.predict(x_testmlp)

In [9]:
print("\nAccuracy score for MLP\n",accuracy_score(y_test, y_pred))
print("\nConfusion matrix for MLP\n",confusion_matrix(y_test,y_pred))
print("\nClassification report for MLP\n",classification_report(y_test,y_pred))


Accuracy score for MLP
 0.8779

Confusion matrix for MLP
 [[ 906    2    2    0    0    3   10    5   50    2]
 [   0 1074   14   22    0    1    1    6   16    1]
 [   6   67  871   34    0    0    6   25   19    4]
 [   0   49   11  854    0   19    0   56   15    6]
 [   5    0    1    0  748    4   14    7    6  197]
 [   7    7    0   45    1  668    8   24   92   40]
 [   7    3    2    0    4   10  898    8   25    1]
 [   0    8    5   14    1    3    0  981    3   13]
 [   4    9    4   10    4   20    6    7  894   16]
 [   4    2    1    4   13   15    1   63   21  885]]

Classification report for MLP
               precision    recall  f1-score   support

           0       0.96      0.92      0.94       980
           1       0.88      0.95      0.91      1135
           2       0.96      0.84      0.90      1032
           3       0.87      0.85      0.86      1010
           4       0.97      0.76      0.85       982
           5       0.90      0.75      0.82       892

In [15]:
mlp = MLPClassifier(hidden_layer_sizes=(30, 5), max_iter=300,
                    solver='sgd', random_state=100,learning_rate = 'adaptive',
                    learning_rate_init=.1)

mlp.fit(x_trainmlp, y_train)
y_pred = mlp.predict(x_testmlp)

In [16]:
print("\nAccuracy score for MLP\n",accuracy_score(y_test, y_pred))
print("\nConfusion matrix for MLP\n",confusion_matrix(y_test,y_pred))
print("\nClassification report for MLP\n",classification_report(y_test,y_pred))


Accuracy score for MLP
 0.4265

Confusion matrix for MLP
 [[   0    0    0    1    0    5    0  966    4    4]
 [   0 1105    1    0    0    1    0    6   22    0]
 [   0    1    5    5    0   18    0  982   14    7]
 [   0    0    9  843    0   31    0   85   39    3]
 [   0    0    0    0    0    0    0  982    0    0]
 [   0    1    0    7    0  453    0  182  241    8]
 [   0    2    0    0    0    3    0  950    3    0]
 [   0    0    1    4    0    8    0 1009    6    0]
 [   0   11    0   10    0   38    0   62  848    5]
 [   0    3    0    6    0   17    0  971   10    2]]

Classification report for MLP
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       980
           1       0.98      0.97      0.98      1135
           2       0.31      0.00      0.01      1032
           3       0.96      0.83      0.89      1010
           4       0.00      0.00      0.00       982
           5       0.79      0.51      0.62       892

  _warn_prf(average, modifier, msg_start, len(result))


MLP Classifier with different max iterations, an adaptive learning rate and different hidden states have been attempted.The best case is the first one, having max iterations being 150, solver being stochastic gradient and the hidden states being (20,3), giving an accuracy of 0.88.

SVM classifier

In [19]:
from sklearn.svm import SVC

In [21]:
x_train = x_train.reshape(x_train.shape[0], 28*28)
x_test = x_test.reshape(x_test.shape[0], 28*28)

In [22]:
x_trainsvm = x_train[0:10000]
y_trainsvm = y_train[0:10000]
x_testsvm = x_test[0:2000]
y_testsvm = y_test[0:2000]

x_trainsvm = pca.fit_transform(x_trainsvm)
x_testsvm = pca.transform(x_testsvm)
x_trainsvm = sc.fit_transform(x_trainsvm)
x_testsvm = sc.transform(x_testsvm)

In [23]:
subclf = SVC(kernel='linear', C=0.001)
subclf.fit(x_trainsvm, y_trainsvm)
y_predsvm = subclf.predict(x_testsvm)

In [24]:
print("\nAccuracy score for SVM\n",accuracy_score(y_testsvm, y_predsvm))
print("\nConfusion matrix for SVM\n",confusion_matrix(y_testsvm, y_predsvm))
print("\nClassification report for SVM\n",classification_report(y_testsvm, y_predsvm))


Accuracy score for SVM
 0.8585

Confusion matrix for SVM
 [[168   0   1   1   2   1   2   0   0   0]
 [  1 228   0   1   1   0   2   0   1   0]
 [  5   9 174   2   0   1   8   4  14   2]
 [  1   0   7 173   0  11   1   6   4   4]
 [  0   2   2   0 195   0   4   1   2  11]
 [  6   2   0   9   2 144   2   3   9   2]
 [  3   3   0   0   1  11 158   0   2   0]
 [  0  10   6   2   5   1   0 165   1  15]
 [  2   5   6   8   5  12   1   2 148   3]
 [  1   1   0   8  10   1   0   4   5 164]]

Classification report for SVM
               precision    recall  f1-score   support

           0       0.90      0.96      0.93       175
           1       0.88      0.97      0.92       234
           2       0.89      0.79      0.84       219
           3       0.85      0.84      0.84       207
           4       0.88      0.90      0.89       217
           5       0.79      0.80      0.80       179
           6       0.89      0.89      0.89       178
           7       0.89      0.80      0.85  

In [25]:
subclf = SVC(kernel='poly')
subclf.fit(x_trainsvm, y_trainsvm)
y_predsvm = subclf.predict(x_testsvm)

In [26]:
print("\nAccuracy score for SVM\n",accuracy_score(y_testsvm, y_predsvm))
print("\nConfusion matrix for SVM\n",confusion_matrix(y_testsvm, y_predsvm))
print("\nClassification report for SVM\n",classification_report(y_testsvm, y_predsvm))


Accuracy score for SVM
 0.199

Confusion matrix for SVM
 [[  4 171   0   0   0   0   0   0   0   0]
 [  0 233   0   0   0   0   1   0   0   0]
 [  0 188  30   0   1   0   0   0   0   0]
 [  0 198   0   8   0   0   0   1   0   0]
 [  0 207   1   1   7   0   0   0   0   1]
 [  0 160   1   0   0  18   0   0   0   0]
 [  0 140   0   0   0   1  37   0   0   0]
 [  0 168   0   1   0   0   0  34   0   2]
 [  0 190   1   0   0   0   0   0   1   0]
 [  0 166   0   0   0   0   0   2   0  26]]

Classification report for SVM
               precision    recall  f1-score   support

           0       1.00      0.02      0.04       175
           1       0.13      1.00      0.23       234
           2       0.91      0.14      0.24       219
           3       0.80      0.04      0.07       207
           4       0.88      0.03      0.06       217
           5       0.95      0.10      0.18       179
           6       0.97      0.21      0.34       178
           7       0.92      0.17      0.28   

SVM with  multiple kernels is tested. The best case is with kernel = linear and c = 0.001. The best SVM accuracy is 0.8.