In [324]:
from zipfile import ZipFile
import numpy as np

np.random.seed(0)

'''load your data here'''

class DataLoader(object):
    def __init__(self):
        DIR = '../data/'
        pass
    
    # Returns images and labels corresponding for training and testing. Default mode is train. 
    # For retrieving test data pass mode as 'test' in function call.
    def load_data(self, mode = 'train'):
        label_filename = mode + '_labels'
        image_filename = mode + '_images'
        label_zip = '../data/' + label_filename + '.zip'
        image_zip = '../data/' + image_filename + '.zip'
        with ZipFile(label_zip, 'r') as lblzip:
            labels = np.frombuffer(lblzip.read(label_filename), dtype=np.uint8, offset=8)
        with ZipFile(image_zip, 'r') as imgzip:
            images = np.frombuffer(imgzip.read(image_filename), dtype=np.uint8, offset=16).reshape(len(labels), 784)
        return images, labels

    def create_batches(self, img, lab, s = 10):
        r = np.random.randint(img.shape[0], size = s)
        return img[r], lab[r]
        pass

In [414]:
def ReLU(x):
    x[x < 0] = 0
    return x

class NN(object):
    
    # random initiation
    def __init__(self, h, inp, op):
        self.inp = inp
        self.hn = h
        self.op = op
        self.W1 = np.random.normal(0, 0.01, (inp+1, h))
        self.W2 = np.random.normal(0, 0.01, (h+1, op))
        pass
    
    # forward pass
    def forward(self, inp):
        self.inter = []
        self.inter.append(inp)
        h = np.append(inp, np.ones((inp.shape[0], 1)), axis = 1)
        self.inter.append(h)
        
        # hidden layer
        r = ReLU(np.matmul(h, self.W1))
        self.inter.append(r)
        v = np.append(r, np.ones((r.shape[0], 1)), axis = 1)
        self.inter.append(v)
        
        # output layer
        t = np.matmul(v, self.W2)
        self.inter.append(t)
        
        # softmax
        y = np.exp(t)
        y = y / np.sum(y, axis = 1, keepdims = True)
        self.inter.append(y)
        
        self.pred = y
        pass
    
    # prints the total loss
    def printloss(self):
        print 'The cross entropy loss is: %f' % self.crl
        print 'The regularization loss is: %f' % self.regl
        print 'The total loss is: %f' % self.totl
        
    # total loss
    # beta is regularization coefficient
    # outp is to set whether to print the loss
    def loss(self, lab, beta = 1, outp = False):
        
        # cross entropy loss
        u = np.zeros((lab.shape[0], self.op))
        u[np.arange(lab.shape[0]), lab] = 1
        self.exp = u
        self.crl = - np.mean((u * np.log(self.pred)) + ((1 - u) * np.log(1 - self.pred)))
        
        # L2 regularization loss   
        self.regl = 0.5 * beta * (np.mean(np.square(self.W1[:-1,:])) + np.sum(np.square(self.W2[:-1,:])))
        
        # total loss
        self.totl = self.crl + self.regl
        
        # print if required
        if outp:
            print 'The cross entropy loss is: %f' % self.crl
            print 'The regularization loss is: %f' % self.regl
            print 'The total loss is: %f' % self.totl
            
        pass     
        
    # backward pass
    # beta is regularization coefficient
    # alpha is learning rate
    # outp is to set whether to print the loss
    def backward(self, lab, beta = 1, alpha = 0.1, outp = False):
        
        # loss caculation
        self.loss (lab, beta, outp)
        
        self.der = []
        # backprop over cross entropy loss
        self.der.append((self.pred - self.exp) / (self.pred * (1 - self.pred)))
        
        # backprop over softmax
        self.der.append (self.pred * (self.der[0] - np.sum(self.pred * self.der[0], axis = 1, keepdims = True)))
        
        # backprop over output layer
        # weights derivative
        self.der.append (np.matmul(self.inter[3].T, self.der[1]))
        # input derivative
        self.der.append (np.matmul(self.der[1], self.W2.T))
        self.der.append (self.der[3][:,:-1])
        
        # backprop over hidden layer
        # weights derivative
        self.der.append (np.matmul(self.inter[1].T, self.der[4] * (self.inter[2] > 0)))
        
        # L2 derivatives        
        self.der.append (beta * np.append(self.W2[:-1,:], np.zeros((1, self.op)), axis = 0))
        self.der.append (beta * np.append(self.W1[:-1,:], np.zeros((1, self.hn)), axis = 0))
        
        # final total derivatives
        self.W1der = self.der[5] + self.der[7]
        self.W2der = self.der[2] + self.der[6]
        
        # changing weights accordingly
        self.W1 = self.W1 - alpha * self.W1der
        self.W2 = self.W2 - alpha * self.W2der
        del self.der
        pass
    
    # prints the classification accuracy
    def classacc (self, lab):
        count = 0
        h = (np.argmax(self.pred, axis = 1) == lab)
        for x in h:
            if x:
                count += 1
        print 'The classification accuracy is: %d / %d = %f %%' %(count, h.shape[0], (count/h.shape[0])*100)


In [415]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# performs mini-batch SGD
# hl: hidden layer size
# alpha: learning rate
# beta: regularization parameter
# bsize: batch size for SGD
# epoch: number of iterations for training
def minisgd (hlsize = 50, alpha = 10**-5, beta = 10, bsize = 10, epoch = 5000):
    
    dl = DataLoader()
    img, lab = dl.load_data()
    u = NN(hlsize, 784, 10)
    
    crl_history = []
    regl_history = []
    totl_history = []
    
    for i in range(epoch):
        ti, tl = dl.create_batches(img, lab, bsize)
        u.forward(ti)
        u.backward(tl, beta, alpha)
        crl_history.append(u.crl)
        regl_history.append(u.regl)
        totl_history.append(u.totl)
    crl_history.append(u.crl)
    regl_history.append(u.regl)
    totl_history.append(u.totl)
    
    print 'Initially'
    print 'The cross entropy loss was: %f' % crl_history[0]
    print 'The regularization loss was: %f' % regl_history[0]
    print 'The total loss was: %f' % totl_history[0]
    print '\nAfter %d passes' % (i+1)
    print 'The cross entropy loss is: %f' % crl_history[-1]
    print 'The regularization loss is: %f' % regl_history[-1]
    print 'The total loss is: %f' % totl_history[-1]
    
    print '\nOn the train set:'
    u.forward(img)
    u.classacc(lab)
    
    plt.plot(crl_history, 'b-')
    plt.plot(regl_history, 'g-')
    plt.plot(totl_history, 'r-')
    plt.show()
    
    imgte, labte = dl.load_data(mode = 'test')
    u.forward(imgte)
    print '\nOn the test set:'
    u.loss(labte, beta, True)
    
    

array([9, 2, 1, ..., 8, 1, 5], dtype=uint8)

In [413]:
count = 0
h = (np.argmax(u.pred, axis = 1) == labte)
for x in h:
    if x:
        count += 1
print count
print h.shape[0]

8105
10000
