Charalampos Kaidos & Ioannis Papantonis

In the same directory as this notebook, create a folder named "mnist" and put inside the mnist data that where shipped with the project anouncement. In a separate folder called "cifar-10-batches-py" put the cifar10 pickled dataset as provided for python in the CIFAR website.

The functions bellow load MNIST data set from the files provided in the excercise. Also there are functions to save the data in pickled format and load them from pickled format which is much faster than loading them from the texr files.

In [1]:
from os import listdir
from os.path import isfile, join

import numpy
import pickle

def load_mnist(path):
    print('Reading MNIST dataset from storage')

    trainfiles = [join(path, f) for f in listdir(path) if isfile(join(path, f)) and 'train' in f]
    train_data, train_target = load_mnist_from_files(trainfiles)

    testfiles = [join(path, f) for f in listdir(path) if isfile(join(path, f)) and 'test' in f]
    test_data, test_target = load_mnist_from_files(testfiles)

    print('Finished reading MNIST')
    return train_data, train_target, test_data, test_target

def load_mnist_from_files(files):
    data = None
    target = None

    for file in files:
        print('Reading file {}'.format(file))
        temp_data = numpy.loadtxt(file)
        if (numpy.any(data) == None):
            data = temp_data
        else:
            data = numpy.append(data, temp_data, axis=0)

        rows, columns = temp_data.shape
        temp_target = numpy.zeros((rows, 10))
        temp_target[:, int(file[-5])] = numpy.ones(rows)
        if (numpy.any(target) == None):
            target = temp_target
        else:
            target = numpy.append(target, temp_target, axis=0)

        print('Read {} rows, {} columns from {}'.format(rows, columns, file))

    return data, target

def pickle_mnist(traindata, traintarget, testdata, testtarget):
    f=open('./mnist/trdata', mode='wb+')
    pickle.dump(traindata, f)
    f.close()
    f = open('./mnist/trtarget', mode='wb+')
    pickle.dump(traintarget, f)
    f.close()
    f = open('./mnist/tedata', mode='wb+')
    pickle.dump(testdata, f)
    f.close
    f = open('./mnist/tetarget', mode='wb+')
    pickle.dump(testtarget, f)
    f.close()

def unpickle_mnist():
    fo = open('./mnist/trdata', 'rb')
    traindata = pickle.load(fo)
    fo.close()
    fo = open('./mnist/trtarget', 'rb')
    traintarget = pickle.load(fo)
    fo.close()
    fo = open('./mnist/tedata', 'rb')
    testdata = pickle.load(fo)
    fo.close()
    fo = open('./mnist/tetarget', 'rb')
    testtarget = pickle.load(fo)
    fo.close()
    return traindata, traintarget, testdata, testtarget

The functions bellow load the CIFAR-10 data set from the pickled format provided on the CIFAR website.

In [2]:
from os import listdir
from os.path import isfile, join

import pickle
import numpy

def load_cifar(path):
    print('Reading CIFAR-10 dataset from storage')

    dicts = []

    trainfiles = [join(path, f) for f in listdir(path) if isfile(join(path, f)) and 'data_batch' in f]
    for f in trainfiles:
        dicts.append(unpickle(f))

    data = numpy.vstack(tuple([x['data'] for x in dicts]))
    ones = numpy.ones((data.shape[0], 1))
    data = numpy.hstack((ones,data))

    temp_target = numpy.hstack(tuple([x['labels'] for x in dicts]))
    target = numpy.zeros((temp_target.shape[0], 10))
    for r in range(temp_target.shape[0]):
        target[r, temp_target[r]] = 1

    test_data = unpickle(join(path, 'test_batch'))
    test_target = numpy.zeros((len(test_data['labels']), 10))
    for r in range(test_target.shape[0]):
        test_target[r, test_data['labels'][r]] = 1
    test_data = test_data['data']
    ones = numpy.ones((test_data.shape[0], 1))
    test_data = numpy.hstack((ones, test_data))

    return data, target, test_data, test_target

def unpickle(file):
    fo = open(file, 'rb')
    dictionary = pickle.load(fo, encoding='latin1')
    fo.close()
    return dictionary

The functions bellow implement the neural network.

In [3]:
import numpy as np
import os, struct
import time

def indicators(t):     # returns array with indicator vectors as rows
    (N,)=t.shape
    if np.min(t)>0:
        categories=np.max(t) 
        ind=np.zeros((N,categories))
        for n in range(0,N):
            ind[n,t[n]-1]=1
    else:
        categories=np.max(t)+1 
        ind=np.zeros((N,categories))
        for n in range(0,N):
            ind[n,t[n]]=1
    return ind

def softmax(A):  # definition of softmax function, operating on each row of matrix A
    A = A - np.amax(A, axis = 1).reshape(A.shape[0],1)
    A = np.exp(A)
    A = A / np.sum(A, axis=1).reshape(A.shape[0],1)
    return A

def sigmoid(A):  # definition of the sigmoid function 
    return 1 / (np.exp(-A) + 1)

def activation(A):   # returns the outputs of the hidden layer
    return np.log(1 + np.exp(A))

def output(X, W2):  # returns the activations of the output neurons
    ones = np.ones((X.shape[0], 1))
    X = np.hstack((ones,X))
    A = np.dot(X, W2)
    return softmax(A)

def middle_deltas(X,W,out_deltas):  # helper function that calculates the deltas regarding the hidden layer
    [W1, W2] = W
    A = np.dot(X, W1)
    no_bias = np.delete(W2, 0, axis=0)  # we discard the bias parameters
    deriv_act = sigmoid(A)   #  the derivative of the hidden layer function, is the sigmoid 
    sums = np.dot(out_deltas, no_bias.T)  
    return sums * deriv_act

def middle_grad(X,W,out_deltas):  # returns the gradient of the parameters connecting the input with the hidden layer
    d2 = middle_deltas(X, W, out_deltas)
    grad = np.dot(X.T, d2)
    return grad

def forward_pass(X,W):  #  returns the outputs of the neural network
    [W1,W2]=W
    act=activation(np.dot(X, W1))
    out=output(act,W2)
    return out

def activations_outputs(X,W): #the same as above, but also returns the activations
    [W1,W2] = W
    act=activation(np.dot(X,W1))
    out=output(act,W2)
    return [act,out]

def gradient(X,T,W):  # returns a list with the gradient of the hidden layer parameters as first element, and the output's layer as second
    [act, out] = activations_outputs(X, W)
    D1 = T-out
    mid_grad = middle_grad(X, W, D1)
    ones = np.ones((act.shape[0],1))
    act = np.hstack((ones, act))  # for the bias
    out_grad = np.dot(act.T, D1)
    return [mid_grad, out_grad]

def cost(X,T,W,l):   #the cost function
    Y = forward_pass(X, W)
    [W1, W2] = W
    return np.sum(T * np.log(Y + np.finfo(float).eps)) - l / 2 * (np.sum(np.linalg.norm(W1, axis=0) ** 2) + np.sum(np.linalg.norm(W2, axis=0) ** 2))

def computed_gradient(X,T,W,l):  # gradient check
    [w1,w2]=W
    (M,D)=w1.shape
    (K,N)=w2.shape
    gr1=np.zeros(w1.shape)
    gr2=np.zeros(w2.shape)
    for m in range(0,M):
        for d in range(0,D):
            plus=np.copy(w1)
            minus=np.copy(w1)
            plus[m][d]=plus[m][d]+1e-06
            minus[m][d]=minus[m][d]-1e-06
            gr1[m,d]=(cost(X,T,[plus,w2],l)-cost(X,T,[minus,w2],l))/2*1e-06
    for k in range(0,k):
        for n in range(0,N):
            plus=np.copy(w2)
            minus=np.copy(w2)
            plus[k][n]=plus[k][n]+1e-06
            minus[k][n]=minus[k][n]-1e-06
            gr2[k,n]=(cost(X,T,[w1,plus],l)-cost(X,T,[w1,minus],l))/2*1e-06
    return [gr1,gr2]

def train(X,T,init,l,etta,iterations): #X array (N,D+1), T array (N,K), init is the initial guess for the weights, l regularization parameter
    (N,D) = X.shape                    # etta is the learning rate, iterations is the maximum number of iterations
    E_old=-np.inf
    E_new=cost(X,T,init,l)
    W=init
    [W1, W2] = W
    i = 0
    time_elapsed = 0
    while(np.abs(E_new-E_old)>0.001):
        start = time.time()
        print('Starting iteration {}'.format(i))
        E_old=E_new
        (grad1, grad2) = gradient(X, T, W)
        #t = computed_gradient(X,T,W,l)
        grad1=grad1-l*W1   
        grad2=grad2-l*W2
        W1=W1+etta*grad1   # gradient ascent update
        W2=W2+etta*grad2   # gradient ascent update
        W=[W1,W2]
        E_new=cost(X,T,W,l)
        print(E_new)
        time_elapsed += (time.time() - start)
        print('Elapsed: {:.3f}s, Remaining: {:.3f}s'.format(time_elapsed,(time_elapsed / (i + 1)) * (iterations - i + 1)))
        i += 1
        if(i > iterations):
            break
    return W

def predict(X,W):   #X must have 1s as first column, returns predicted labels
    (N,D)=X.shape
    pred=np.zeros(N)
    Y = forward_pass(X, W)
    pred = np.argmax(Y, axis=1)
    return pred

Bellow we train a neural network on the MNIST data set. We have executed 10 iterations here. On 3000 iterations we got accuracy 87%. Here we have disabled gradient check for performance reasons.

    Starting iteration 2995
    -38232.0481124
    Elapsed: 2781.969s, Remaining: 5.571s
    Starting iteration 2996
    -38177.6512299
    Elapsed: 2782.919s, Remaining: 4.643s
    Starting iteration 2997
    -38224.8117385
    Elapsed: 2783.854s, Remaining: 3.714s
    Starting iteration 2998
    -38170.4619134
    Elapsed: 2784.780s, Remaining: 2.786s
    Starting iteration 2999
    -38217.5836823
    Elapsed: 2785.715s, Remaining: 1.857s
    Starting iteration 3000
    -38163.2808416
    Elapsed: 2786.652s, Remaining: 0.929s
    Predicted correct 8680 out of 10000

In [7]:
traindata, traintarget, testdata, testtarget = load_mnist('./mnist')

def init_weights():  # initial guess for the weights
    M = 50
    np.random.seed(1)
    W1 = np.random.randn(traindata.shape[1], M)
    W2 = np.random.randn(M+1, traintarget.shape[1])
    W = [W1, W2]
    return W

W = train(traindata/255, traintarget, init_weights(), 0.0001, 0.000001,10)
Y = predict(testdata/255, W)
res = Y - np.argmax(testtarget, axis=1)
zeros = 0
for row in range(res.shape[0]):
    if(res[row] == 0):
        zeros = zeros +1
print('Predicted correct {} out of 10000'.format(zeros))

Reading MNIST dataset from storage
Reading file ./mnist/train5.txt
Read 5421 rows, 784 columns from ./mnist/train5.txt
Reading file ./mnist/train9.txt
Read 5949 rows, 784 columns from ./mnist/train9.txt
Reading file ./mnist/train7.txt
Read 6265 rows, 784 columns from ./mnist/train7.txt
Reading file ./mnist/train1.txt
Read 6742 rows, 784 columns from ./mnist/train1.txt
Reading file ./mnist/train2.txt
Read 5958 rows, 784 columns from ./mnist/train2.txt
Reading file ./mnist/train8.txt
Read 5851 rows, 784 columns from ./mnist/train8.txt
Reading file ./mnist/train6.txt
Read 5918 rows, 784 columns from ./mnist/train6.txt
Reading file ./mnist/train0.txt
Read 5923 rows, 784 columns from ./mnist/train0.txt
Reading file ./mnist/train3.txt
Read 6131 rows, 784 columns from ./mnist/train3.txt
Reading file ./mnist/train4.txt
Read 5842 rows, 784 columns from ./mnist/train4.txt
Reading file ./mnist/test0.txt
Read 980 rows, 784 columns from ./mnist/test0.txt
Reading file ./mnist/test5.txt
Read 892 rows

Bellow we train a neural network on the CIFAR data set. We have executed 10 iterations here. On 3000 iterations we got accuracy 30%. Here we have disabled gradient check for performance reasons.

    Starting iteration 2997
    -116214.946819
    Elapsed: 6168.088s, Remaining: 8.230s
    Starting iteration 2998
    -116214.507111
    Elapsed: 6170.337s, Remaining: 6.172s
    Starting iteration 2999
    -116214.067669
    Elapsed: 6172.802s, Remaining: 4.115s
    Starting iteration 3000
    -116213.62849
    Elapsed: 6175.156s, Remaining: 2.058s
    Predicted correct 3026 out of 10000

In [8]:
traindata, traintarget, testdata, testtarget = load_cifar('./cifar-10-batches-py')

def init_weights():  # initial guess for the weights
    M = 50
    np.random.seed(1)
    W1 = np.random.randn(traindata.shape[1], M)
    W2 = np.random.randn(M+1, traintarget.shape[1])
    W = [W1, W2]
    return W

W = train(traindata/255, traintarget, init_weights(), 0.0001, 0.000001,10)
Y = predict(testdata/255, W)
res = Y - np.argmax(testtarget, axis=1)
zeros = 0
for row in range(res.shape[0]):
    if(res[row] == 0):
        zeros = zeros +1
print('Predicted correct {} out of 10000'.format(zeros))

Reading CIFAR-10 dataset from storage
Starting iteration 0
-1540563.48165
Elapsed: 2.301s, Remaining: 25.308s
Starting iteration 1
-1494516.60519
Elapsed: 4.468s, Remaining: 22.338s
Starting iteration 2
-1469674.16252
Elapsed: 6.455s, Remaining: 19.366s
Starting iteration 3
-1351975.72548
Elapsed: 8.536s, Remaining: 17.072s
Starting iteration 4
-1171866.99918
Elapsed: 10.517s, Remaining: 14.724s
Starting iteration 5
-747319.826937
Elapsed: 12.620s, Remaining: 12.620s
Starting iteration 6
-637967.095752
Elapsed: 14.593s, Remaining: 10.424s
Starting iteration 7
-560894.989355
Elapsed: 16.567s, Remaining: 8.284s
Starting iteration 8
-503081.853704
Elapsed: 18.561s, Remaining: 6.187s
Starting iteration 9
-458231.340626
Elapsed: 20.620s, Remaining: 4.124s
Starting iteration 10
-422539.193236
Elapsed: 22.599s, Remaining: 2.054s
Predicted correct 1176 out of 10000
