## Load MNIST on Python 3.x

In [1]:
import numpy as np
from urllib import request
import gzip
import pickle
import pandas as pd
from PIL import Image
import os
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from tqdm import tqdm_notebook
from keras.utils import np_utils
%matplotlib inline

Using TensorFlow backend.


In [2]:
#MNIST Data download and processing
filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]

def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")

def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open("mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")

def init():
    download_mnist()
    save_mnist()

def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
        print(mnist.keys())
    #return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]
    return mnist

if __name__ == '__main__':
    init()


Downloading train-images-idx3-ubyte.gz...
Downloading t10k-images-idx3-ubyte.gz...
Downloading train-labels-idx1-ubyte.gz...
Downloading t10k-labels-idx1-ubyte.gz...
Download complete.
Save complete.


## Load USPS on Python 3.x

In [3]:
USPSMat  = []
USPSTar  = []
curPath  = 'proj3_images/Numerals'
savedImg = [] # why have u given a separate folder for test, we cnt figure out the target in that. should we just take some data from numerls?

for j in range(0,10):
    curFolderPath = curPath + '/' + str(j)
    imgs =  os.listdir(curFolderPath)
    for img in imgs:
        curImg = curFolderPath + '/' + img
        if curImg[-3:] == 'png':
            img = Image.open(curImg,'r')
            img = img.resize((28, 28))
            savedImg = img
            imgdata = (255-np.array(img.getdata()))/255
            USPSMat.append(imgdata)
            USPSTar.append(j)

In [4]:
#MNIST Data
MNist_Dataset = load()
Mnist_TrainingData = MNist_Dataset["training_images"][:50000]
Mnist_TrainingTarget = MNist_Dataset["training_labels"][:50000]

#MNIST TestData
Mnist_TestingData = MNist_Dataset["training_images"][50000:60000]
Mnist_TestingTarget = MNist_Dataset["training_labels"][50000:60000]

#USPS Data
USPS_TestingData = pd.DataFrame(USPSMat)
USPS_TargetData = pd.DataFrame(USPSTar)

dict_keys(['test_images', 'training_labels', 'test_labels', 'training_images'])


In [5]:
#Logistic regression

In [8]:
#Logistic regression
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))


def model(X, w):
    return tf.matmul(X, w) # notice we use the same model as linear regression, this is because there is a baked in cost function which performs softmax and cross entropy

#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = Mnist_TrainingData, np_utils.to_categorical(np.array(Mnist_TrainingTarget),10), Mnist_TestingData, np_utils.to_categorical(np.array(Mnist_TestingTarget),10)

X = tf.placeholder("float", [None, 784]) # create symbolic variables
Y = tf.placeholder("float", [None, 10])

w = init_weights([784, 10]) # like in linear regression, we need a shared variable weight matrix for logistic regression

py_x = model(X, w)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y)) # compute mean cross entropy (softmax is applied internally)
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost) # construct optimizer
predict_op = tf.argmax(py_x, 1) # at predict time, evaluate the argmax of the logistic regression

# Launch the graph in a session
with tf.Session() as sess:
    # you need to initialize all variables
    tf.global_variables_initializer().run()
    for i in range(1000):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX)+1, 128)):
            #end = start + 128

            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})
 
        training_accuracy = []
        training_accuracy.append(np.mean(np.argmax(teY, axis=1) ==
                         sess.run(predict_op, feed_dict={X: teX})))
    predictedTestLabelMNIST = sess.run(predict_op, feed_dict={X: teX})
    predictedTestLabelUSPS  = sess.run(predict_op, feed_dict={X: USPS_TestingData})
#Testing The Model for MNIST
    wrong   = 0
    right   = 0


    for i,j in zip(teY,predictedTestLabelMNIST):

        if np.argmax(i) == j:
            right = right + 1
        else:
            wrong = wrong + 1

    print("For MNIST Dataset")
    print("Errors: " + str(wrong), " Correct :" + str(right))

    print("Testing Accuracy: " + str(right/(right+wrong)*100))
    print('Confusion Matrix MNIST')
    print(confusion_matrix(Mnist_TestingTarget,predictedTestLabelMNIST))
    
#Testing The Model for USPS
    wrong   = 0
    right   = 0
  
    for i,j in zip(np_utils.to_categorical(np.array(USPS_TargetData),10),predictedTestLabelUSPS):

        if np.argmax(i) == j:
            right = right + 1
        else:
            wrong = wrong + 1

    print("For USPS Dataset")
    print("Errors: " + str(wrong), " Correct :" + str(right))

    print("Testing Accuracy: " + str(right/(right+wrong)*100))
    print('Confusion Matrix USPS')
    print(confusion_matrix(USPS_TargetData, predictedTestLabelUSPS))

For MNIST Dataset
Errors: 1089  Correct :8911
Testing Accuracy: 89.11
Confusion Matrix MNIST
[[ 926    0    3    9    1   22   18    2    7    3]
 [   0 1014    2   11    0    4    3    6   22    2]
 [   5   24  746   70    6   14   32   22   69    2]
 [   1    1    6  952    1   32    3    5   19   10]
 [   5   15    5    3  847    3   29   13    7   56]
 [   9    2    4   46    4  802   22    5   20    1]
 [   5    2    2    0    3   18  933    0    4    0]
 [   6    4    1   17    4    2    0 1024    3   29]
 [   0   13    2   46    1   69   12   14  833   19]
 [   4    6    0   12   15   23    2   59    6  834]]
For USPS Dataset
Errors: 15143  Correct :4856
Testing Accuracy: 24.281214060703036
Confusion Matrix USPS
[[317   6 367 320 108 261 169 318  21 113]
 [ 49 118 552 184 159 228 103 367 161  79]
 [114  78 848 160  24 365 237  72  69  32]
 [ 39  69 396 506  12 720  45  87  86  40]
 [ 57  56 211  57 569 249 119 429 158  95]
 [ 59  42 552 146  29 935 104  87  33  13]
 [144  30 615

In [9]:
# SVM & RandomForest
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

X_train_MNIST, y_train_MNIST = Mnist_TrainingData, Mnist_TrainingTarget

X_test_MNIST, y_test_MNIST = Mnist_TestingData, Mnist_TestingTarget
X_test_USPS, y_test_USPS = USPS_TestingData, USPS_TargetData
# SVM
classifier1 = SVC(kernel='poly', C=3, gamma =0.05);
classifier1.fit(X_train_MNIST, y_train_MNIST)
predicted_SVM_USPS = classifier1.predict(X_test_USPS)
predicted_SVM_MNIST = classifier1.predict(X_test_MNIST)
target_names = ['class 0', 'class 1', 'class 2','class 3', 'class 4', 'class 5','class 6', 'class 7', 'class 8','class 9']

# get the accuracy
print("SVM:")
print("Accuracy USPS")
print(classification_report(y_test_USPS, predicted_SVM_USPS, target_names=target_names))
print('Confusion Matrix USPS')
print(confusion_matrix(y_test_USPS, predicted_SVM_USPS))

print("Accuracy MNIST")
print(classification_report(y_test_MNIST, predicted_SVM_MNIST, target_names=target_names))
print('Confusion Matrix MNIST')
print(confusion_matrix(y_test_MNIST, predicted_SVM_MNIST))
#print accuracy_score(y_test_MNIST, predicted_SVM_MNIST)


#RandomForestClassifier
classifier2 = RandomForestClassifier(n_estimators=5);#less number of estimator, inlcude more parameters
classifier2.fit(X_train_MNIST, y_train_MNIST)
predicted_USPS = classifier2.predict(X_test_USPS)
predicted_MNIST = classifier2.predict(X_test_MNIST)

# get the accuracy
print("Random Forest:")
print("Accuracy USPS")
print(classification_report(y_test_USPS, predicted_USPS, target_names=target_names))
print('Confusion Matrix USPS')
print(confusion_matrix(y_test_USPS, predicted_USPS))

print("Accuracy MNIST")
print(classification_report(y_test_MNIST, predicted_MNIST, target_names=target_names))
print('Confusion Matrix MNIST')
print(confusion_matrix(y_test_MNIST, predicted_MNIST))
#############################

SVM:
Accuracy USPS


  'precision', 'predicted', average, warn_for)


             precision    recall  f1-score   support

    class 0       0.00      0.00      0.00      2000
    class 1       0.10      1.00      0.18      2000
    class 2       0.00      0.00      0.00      1999
    class 3       0.00      0.00      0.00      2000
    class 4       0.00      0.00      0.00      2000
    class 5       0.00      0.00      0.00      2000
    class 6       0.00      0.00      0.00      2000
    class 7       0.00      0.00      0.00      2000
    class 8       0.00      0.00      0.00      2000
    class 9       0.00      0.00      0.00      2000

avg / total       0.01      0.10      0.02     19999

Confusion Matrix USPS
[[   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 1999    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0    0    0    0    0    0    0]
 [   0 2000    0    0

In [10]:
# Neural Network
import keras
from keras.datasets import mnist
from keras.layers import Dense
from keras.layers import Activation
from keras.models import Sequential

x_train_Mnist = Mnist_TrainingData
y_train_Mnist = Mnist_TrainingTarget
x_test_Mnist = Mnist_TestingData
y_test_Mnist = Mnist_TestingTarget
x_test_USPS = USPS_TestingData
y_test_USPS = USPS_TargetData
num_classes=10

y_train_Mnist = keras.utils.to_categorical(y_train_Mnist, num_classes)
y_test_Mnist = keras.utils.to_categorical(y_test_Mnist, num_classes)
y_test_USPS = keras.utils.to_categorical(y_test_USPS, num_classes)
image_size = 784
model = Sequential()
model.add(Dense(units=250, input_shape=(image_size,)))
model.add(Activation(tf.nn.softmax))
model.add(Dense(units=num_classes))
model.add(Activation(tf.nn.softmax))
model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(x_train_Mnist, y_train_Mnist, batch_size=250, epochs=2000,verbose=False,validation_split=.1)
#MNIST Testing
loss,accuracy = model.evaluate(x_test_Mnist, y_test_Mnist, verbose=False)
print("MNIST Dataset:")
print("accuracy:")
print(accuracy)
print("loss:")
print(loss)
#USPS Testing
loss,accuracy = model.evaluate(x_test_USPS, y_test_USPS, verbose=False)
print("USPS Dataset:")
print("accuracy:")
print(accuracy)
print("loss:")
print(loss)

MNIST Dataset:
accuracy:
0.7553
loss:
0.632573749542
USPS Dataset:
accuracy:
0.325466273305
loss:
2.4758614878


In [None]:
from sklearn import tree
import statistics 
from sklearn.linear_model import LogisticRegression

#Logistic regression
model1= LogisticRegression()
model1.fit(Mnist_TrainingData, Mnist_TrainingTarget)
pred1=model1.predict(USPS_TestingData)

#RandomForest
classifier1 = RandomForestClassifier(n_estimators=5);
classifier1.fit(Mnist_TrainingData, Mnist_TrainingTarget)
pred2 = classifier1.predict(USPS_TestingData)

classifier1 = RandomForestClassifier(class_weight=None, criterion='gini',
            max_depth=2, max_features='auto', max_leaf_nodes=None,n_estimators=3);
classifier1.fit(Mnist_TrainingData, Mnist_TrainingTarget)
pred3 = classifier1.predict(USPS_TestingData)

#SVM
classifier2 = SVC(kernel='poly', C=3);
classifier2.fit(Mnist_TrainingData, Mnist_TrainingTarget)
pred4 = classifier2.predict(USPS_TestingData)

classifier2 = SVC(kernel='linear', C=3);
classifier2.fit(Mnist_TrainingData, Mnist_TrainingTarget)
pred5 = classifier2.predict(USPS_TestingData)

#Neural Network
model = Sequential()
model.add(Dense(units=250, input_shape=(784,)))
model.add(Activation(tf.nn.softmax))
model.add(Dense(units=10))
model.add(Activation(tf.nn.softmax))
model.compile(optimizer='sgd', loss='categorical_crossentropy',metrics=['accuracy'])
model.fit(Mnist_TrainingData, Mnist_TrainingTarget, batch_size=250, epochs=2000,verbose=False,validation_split=.1)
pred6=model.predict(USPS_TestingData)

final_pred = np.array([])
for i in range(0,len(USPS_TestingData)):
    final_pred = np.append(final_pred, statistics.mode([pred1[i], pred2[i], pred3[i], pred4[i], pred5[i], pred6[i]]))
print(final_pred)