Use MNIST dataset to create a classifier for all the 10 digits. First implement the
classifier by squeezing the image into a vector and then using a MLP. Now, try the same
task using a different machine learning classifier such as an SVM to check the gain in
performance by using Perceptrons as compared to conventional machine learning
techniques.

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

In [3]:
train = pd.read_csv('mnist_train/mnist_train.csv')
test = pd.read_csv('mnist_test/mnist_test.csv')

In [4]:
train.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X_train = train.values[:,1:]
y_train = train.values[:,0]
X_test = test.values[:,1:]
y_test = test.values[:,0]

In [6]:
y_train = np.matrix(y_train).T
y_test = np.matrix(y_test).T

In [7]:
onehotencoder = OneHotEncoder(categories='auto')
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
y_train = onehotencoder.fit_transform(y_train).toarray()
X_test = scaler.fit_transform(X_test)
y_test = onehotencoder.fit_transform(y_test).toarray()

In [8]:
input_size = X_train.shape[1]

In [9]:
hidden_size = 1000

In [10]:
input_weights = np.random.normal(size=[input_size,hidden_size])
biases = np.random.normal(size=[hidden_size])

In [11]:
def relu(x):
    return np.maximum(x, 0)

In [12]:
def hidden_nodes(X):
    G = np.dot(X, input_weights)
    G = G + biases
    H = relu(G)
    return H

In [13]:
output_weights = np.dot(np.linalg.pinv(hidden_nodes(X_train)), y_train)

In [14]:
def predict(X):
    out = hidden_nodes(X)
    out = np.dot(out, output_weights)
    return out

In [15]:
prediction = predict(X_test)
correct = 0
total = X_test.shape[0]
for i in range(total):
    predicted = np.argmax(prediction[i])
    actual = np.argmax(y_test[i])
    correct += 1 if predicted == actual else 0
accuracy = correct/total
print('Accuracy for ', hidden_size, ' hidden nodes: ', accuracy)

Accuracy for  1000  hidden nodes:  0.9433


In [26]:
from sklearn import svm

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

model = svm.SVC()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
predictions = [value for value in y_pred]
ac3 = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (ac3 * 100.0))

Accuracy: 33.33%
