Assignment 2 - MNIST Classification



In [0]:
#sigmoid function for activation
def sigmoid(z):
    s = 1 / (1 + np.exp(-z))
    return s

#function to one hot encode the array given - output
def one_hot_encoding(output, num_labels=10):
  one_h = np.zeros((output.shape[0], num_labels))
  for x, y in enumerate(output):
    one_h[x,y] = 1.0
  return one_h


      

In [0]:
#importing the dataset
from sklearn.datasets import load_digits, fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm 


x,y = fetch_openml('mnist_784', version=1, return_X_y=True)
#training - 60000x784 input, 60000x1 output
#testing - 10000x784 input, 10000x1 output
#splitting the testing and training data
xtrain, xtest, ytrain, ytest = train_test_split(x,y, train_size=60000, test_size=10000, shuffle=False)

xtrain = xtrain.astype(np.float)
xtest = xtest.astype(np.float)

#normalizing the data so don't compute with high numbers
xtrain = xtrain/255
xtest = xtest/255

#from string to integer for output
ytrain = ytrain.astype(np.int)
ytest = ytest.astype(np.int)
    
#transform output into one hot encoding for both test and training sets
ytrain = one_hot_encoding(ytrain)
yt = one_hot_encoding(ytest)

In [0]:
class Perceptron(object):

    #define the attirubutes for the function
    def __init__(self, num_hidden, epoch=22, c=0.01):
        self.epoch = epoch #iteration
        self.c = c #learning rate
        self.input_size = 784 #inputs
        self.output_size = 10 #output
        self.hidden = num_hidden #neurons in the hidden layer
        
        print('Initialization with random weight')
        print('-----')
        self.weight1 = np.random.normal(0, 0.2, [self.input_size, self.hidden]) #weights in the first layer between input and hidden
        self.bias1 = np.zeros((1, self.hidden)) # bias for weight 1
        self.weight2 = np.random.normal(0, 0.2, [self.hidden, self.output_size]) #weights in the second layer between hidden and output
        self.bias2 = np.zeros((1, self.output_size))  # bias for weight 2
      
    #Used to train the network - takes the training input and output, and batch value
    def train(self, training_inputs, training_outputs, batch):
      
        print('Network training with a '+str(batch)+' batch size')
        print('-----')
        
        #momentum value
        beta = 0.4
        #intializing previous gradients 
        prev_grad_w2 = 0
        prev_grad_w1 = 0
        
        #for the number of epochs specified
        pbar = tqdm(range(self.epoch))
        for k in pbar:
          it = 0
          avg_error = 0
          while it < training_inputs.shape[0]:
                
                #take the batch size of inputs and outputs
                tx = training_inputs[it:it+batch]
                ty = training_outputs[it:it+batch]
                
                # forward pass
                z1 = np.dot(tx, self.weight1) + self.bias1
                a1 = sigmoid(z1)
                z2 = np.dot(a1, self.weight2) + self.bias2
                y = sigmoid(z2)
                
                #backpropogation
                dy2 = y*(1-y) #derivative of final output
                dy1 = a1*(1-a1) #derivative of hidden layer output
                output_error = np.subtract(ty,y) #error in output layer
                avg_error += np.mean(output_error)
                output_delta = output_error * dy2 #element wise mulitplication
                hidden_error = np.dot(output_delta, self.weight2.T)
                hidden_layer_delta = hidden_error * dy1 #element wise multiplication
                
                #momentum
                momentum_factor_w1 = beta * prev_grad_w1
                momentum_factor_w2 = beta * prev_grad_w2
                
                #backpropogation calculation of each weight and bias
                dW2 = np.dot(a1.T, output_delta) # forward * backward
                db2 = np.sum(output_delta, axis = 0, keepdims = True)
                dW1 = np.dot(tx.T, hidden_layer_delta)
                db1 = np.sum(hidden_layer_delta, axis = 0, keepdims = True)
                
                
                #update weights and bias
                self.weight2 += (1./batch * self.c * dW2) + momentum_factor_w2
                self.bias2 +=  1./batch * self.c * db2
                self.weight1 +=  (1./batch * self.c * dW1) + momentum_factor_w1 
                self.bias1 +=  1./batch * self.c * db1
                
                #save the graident to use in next batch iteration
                prev_grad_w1 = 1./batch * self.c * dW1
                prev_grad_w2 = 1./batch * self.c * dW2
                
                #input("Wait")
                
                #increment to access next set of inputs
                it = it + batch
          avg_error /= float(training_inputs.shape[0])
          pbar.set_description("Error: %.4f" %(avg_error))
        

  # Used to test the neural network - accepts the x or input values of the test data
    def test(self, test_input, test_output):
      #forward feed
      z1 = np.dot(test_input, self.weight1) + self.bias1
      a1 = sigmoid(z1)
      z2 = np.dot(a1, self.weight2) + self.bias2
      y = sigmoid(z2)
      
      a = np.zeros((10000, 1))
     
      # To see how accurate the neural network is compared to the correct values
      acc = 0.0
      for i in range(10000):
        if np.argmax(y[i]) == np.argmax(test_output[i]):
          acc += 1
          a[i] = np.argmax(y[i])
      print(acc / 10000 * 100, "%")
      
      a = a.astype(np.int)
      
      return a

    def output_data_txt(self, true_output, predictions):
      #outputs predicated and weights into a file on the drive - need to give it access
      from google.colab import drive
      drive.mount('/content/gdrive', force_remount=True)
      
      #print final weights into a text file
      with open('/content/gdrive/My Drive/Output/finalweight.txt', 'w') as f:
        np.savetxt(f, self.weight1, fmt='%.5f', delimiter=',', header='Weight1')
        np.savetxt(f, self.bias1, fmt='%.5f', delimiter=',', header='Bias1')
        np.savetxt(f, self.weight2, fmt='%.5f', delimiter=',', header='Weight2')
        np.savetxt(f, self.bias2, fmt='%.5f', delimiter=',', header='Bias2')
      
      #combine the predictions and true value into 1 matrix (so can see side beside)
      true_output = np.reshape(true_output, (10000, 1))
      final =  np.concatenate( [ predictions, true_output ] , axis = 1)
      #print predications an true value into an output file on google drive
      with open('/content/gdrive/My Drive/Output/outputs.txt', 'w') as d:
        np.savetxt(d, final, fmt='%s',  header = "Predictions vs True Value")
        
      

In [0]:
#Create nueral network (intialize)
per = Perceptron(64)
#Train network
per.train(xtrain,ytrain, 32)
#Test network
prediction = per.test(xtest, yt)
#Output final weights and predicated values into a text file
per.output_data_txt(ytest, prediction)



  0%|          | 0/22 [00:00<?, ?it/s][A[A

Initialization with random weight
-----
Network training with a 32 batch size
-----




Error: -0.0008:   0%|          | 0/22 [00:02<?, ?it/s][A[A

Error: -0.0008:   5%|▍         | 1/22 [00:02<00:48,  2.29s/it][A[A

Error: -0.0007:   5%|▍         | 1/22 [00:04<00:48,  2.29s/it][A[A

Error: -0.0007:   9%|▉         | 2/22 [00:04<00:46,  2.31s/it][A[A

Error: -0.0010:   9%|▉         | 2/22 [00:06<00:46,  2.31s/it][A[A

Error: -0.0010:  14%|█▎        | 3/22 [00:06<00:43,  2.29s/it][A[A

Error: -0.0011:  14%|█▎        | 3/22 [00:09<00:43,  2.29s/it][A[A

Error: -0.0011:  18%|█▊        | 4/22 [00:09<00:41,  2.28s/it][A[A

Error: -0.0011:  18%|█▊        | 4/22 [00:11<00:41,  2.28s/it][A[A

Error: -0.0011:  23%|██▎       | 5/22 [00:11<00:38,  2.28s/it][A[A

Error: -0.0010:  23%|██▎       | 5/22 [00:13<00:38,  2.28s/it][A[A

Error: -0.0010:  27%|██▋       | 6/22 [00:13<00:36,  2.27s/it][A[A

Error: -0.0010:  27%|██▋       | 6/22 [00:15<00:36,  2.27s/it][A[A

Error: -0.0010:  32%|███▏      | 7/22 [00:16<00:34,  2.29s/it][A[A

Error: -0.0009:  32%|███▏ 

89.37 %
Mounted at /content/gdrive
(10000, 1)
(10000, 1)
