In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow.keras as keras
import numpy as np
path = '/home/srenan/workspace/jupyter/data/mnist.npz'
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data(path)
print(train_images.shape)
print(train_labels.shape)
fig, axs = plt.subplots(2,2)
for ii in np.arange(0,2):
    for jj in np.arange(0,2):
        axs[ii,jj].imshow(train_images[ii*2+jj,:,:])
        axs[ii,jj].title.set_text(train_labels[ii*2+jj])
##print(range(0,2))
##plt.show()

(60000, 28, 28)
(60000,)


In [2]:
from keras import models
from keras import layers
network = models.Sequential()
network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
network.add(layers.Dense(10, activation='softmax'))

Using TensorFlow backend.


In [3]:
network.compile(optimizer='rmsprop',
loss='categorical_crossentropy',
metrics=['accuracy'])

In [4]:
train_images = train_images.reshape((60000, 28 * 28)) #Reshape in a vector
train_images = train_images.astype('float32') / 255 #Standardize 0-1 using the range of the data 0-254
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [5]:
from keras.utils import to_categorical
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [6]:
network.fit(train_images, train_labels, epochs=5, batch_size=128)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7fb3e0119390>

In [7]:
test_loss, test_acc = network.evaluate(test_images, test_labels)
print(test_acc)

0.9781000018119812


In [8]:
# Dataset subsetting/reshaping
sstraini = train_images[0:100]
sstrainl = train_labels[0:100]
# SVM/AdaBoost do not use OHE for the labels -> coerce into ints
sstrainl_int = [np.where(r==1)[0][0] for r in sstrainl]
trainl_int = [np.where(r==1)[0][0] for r in train_labels]
testl_int = [np.where(r==1)[0][0] for r in test_labels]

In [9]:
# Random forrest
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 10, random_state = 42)
rf.fit(sstraini, sstrainl)
predRF = rf.predict(test_images)

In [10]:
# SVC: Support Vector Classification
from sklearn import svm
svc = svm.SVC()
svc.fit(sstraini, sstrainl_int)
predSVC = svc.predict(test_images)

In [11]:
# AdaBoost
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators = 50, learning_rate = 1)
abc.fit(train_images, trainl_int)
predADA = abc.predict(test_images)

In [12]:
# Summary
from sklearn.metrics import r2_score
accNN = round(test_acc, 3)
accRF = round(r2_score(test_labels, predRF), 3) #score method works but throws warnings
accSVM = round(svc.score(test_images, testl_int), 3)
accABC = round(abc.score(test_images, testl_int), 3)
print("Neural Net:", accNN)
print("Random Forest:", accRF)
print("Multi-class SVM:", accSVM)
print("AdaBoost:", accABC)

Neural Net: 0.978
Random Forest: 0.332
Multi-class SVM: 0.656
AdaBoost: 0.73


In [13]:
# Attempt to see where errors are
index0 = [ i for i in range(len(testl_int)) if testl_int[i] == 0 ]
index1 = [ i for i in range(len(testl_int)) if testl_int[i] == 1 ]
index2 = [ i for i in range(len(testl_int)) if testl_int[i] == 2 ]
index3 = [ i for i in range(len(testl_int)) if testl_int[i] == 3 ]
index4 = [ i for i in range(len(testl_int)) if testl_int[i] == 4 ]
index5 = [ i for i in range(len(testl_int)) if testl_int[i] == 5 ]
index6 = [ i for i in range(len(testl_int)) if testl_int[i] == 6 ]
index7 = [ i for i in range(len(testl_int)) if testl_int[i] == 7 ]
index8 = [ i for i in range(len(testl_int)) if testl_int[i] == 8 ]
index9 = [ i for i in range(len(testl_int)) if testl_int[i] == 9 ]
label_dict = {'i0':index0, 'i1':index1, 'i2':index2, 'i3':index3, 'i4':index4, 'i5':index5, 'i6':index6, 'i7':index7, 'i8':index8, 'i9':index9}
# Predictions
rf_dict = {'i0' : list(), 'i1' : list(), 'i2' : list(), 'i3' : list(), 'i4' : list(), 'i5' : list(), 'i6' : list(), 'i7' : list(), 'i8' : list(), 'i9' : list()}
svc_dict = {'i0' : list(), 'i1' : list(), 'i2' : list(), 'i3' : list(), 'i4' : list(), 'i5' : list(), 'i6' : list(), 'i7' : list(), 'i8' : list(), 'i9' : list()}
ada_dict = {'i0' : list(), 'i1' : list(), 'i2' : list(), 'i3' : list(), 'i4' : list(), 'i5' : list(), 'i6' : list(), 'i7' : list(), 'i8' : list(), 'i9' : list()}

for i in ['i0','i1','i2','i3','i4','i5','i6','i7','i8','i9']:
    for j in range(0,10):
        rf_dict[i].append(sum(predRF[label_dict[i]] == j))
        svc_dict[i].append(sum(predSVC[label_dict[i]] == j))
        ada_dict[i].append(sum(predADA[label_dict[i]] == j))


In [14]:
import pandas as pd
# Column sums are total number of true_labels
# Row sums are total predicted for each label
print("Random Forest:")
#pd.DataFrame.from_dict(rf_dict)

Random Forest:


In [15]:
print("SVM:")
pd.DataFrame.from_dict(svc_dict)

SVM:


Unnamed: 0,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9
0,876,0,14,16,1,57,19,1,58,13
1,1,1112,230,34,16,33,34,61,71,13
2,4,0,511,17,0,3,7,9,51,1
3,2,4,24,881,2,385,5,15,145,30
4,54,3,195,16,828,235,258,45,128,398
5,1,0,0,0,0,61,1,0,0,0
6,24,2,7,1,13,21,631,0,12,1
7,4,0,16,5,1,14,0,725,4,26
8,1,14,25,24,0,17,0,16,410,3
9,13,0,10,16,121,66,3,156,95,524


In [16]:
print("AdaBoost:")
pd.DataFrame.from_dict(ada_dict)

AdaBoost:


Unnamed: 0,i0,i1,i2,i3,i4,i5,i6,i7,i8,i9
0,883,0,30,28,4,29,20,7,40,9
1,0,1070,35,32,2,32,10,16,48,11
2,25,3,596,19,17,6,35,23,11,23
3,3,8,32,678,14,122,6,8,91,32
4,4,3,18,2,708,27,26,14,15,161
5,28,1,8,92,16,526,32,7,34,18
6,23,4,208,30,10,22,822,1,22,1
7,3,25,25,33,80,19,1,804,18,169
8,3,21,75,74,35,71,6,20,661,34
9,8,0,5,22,96,38,0,128,34,551
