In [None]:
#Loading Data
import numpy as np
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
print ("X_train shape: ", X_train.shape)
print ("y_train shape: ", y_train.shape)
print ("X_test shape: ", X_test.shape)
print ("y_test shape: ", y_test.shape)

X_train shape:  (60000, 28, 28)
y_train shape:  (60000,)
X_test shape:  (10000, 28, 28)
y_test shape:  (10000,)


In [None]:
def one_hot_enco(data):
  '''Convert the dataset into one_hot_encoding'''
  #creating a matrix of the same shape as of data with zeros
  one_hot_data = np.zeros((len(data), len(np.unique(data))))
  for index, values in enumerate(data):
    one_hot_data[index][values] = 1
  return one_hot_data  

In [None]:
#reshaping sample of our input train data
images = (X_train[0:1000].reshape(1000, 28*28) / 255)
labels = y_train[0:1000]

#one_hot encoding labels
labels = one_hot_enco(labels)

In [None]:
#reshapeing our test data
test_images = (X_test.reshape(len(X_test), 28*28) / 255)
test_labels = one_hot_enco(y_test)


In [None]:
print (images.shape, test_images.shape)
print (labels.shape, test_labels.shape)

(1000, 784) (10000, 784)
(1000, 10) (10000, 10)


In [None]:
#Defining some functions to introduce non-linearity

def tanh(x):
  return np.tanh(x)

def softmax(x):
  return (np.exp(x) / np.sum(np.exp(x), axis = 1, keepdims = True))

def tanh_deriv(x):
  return 1 - (x ** 2)

In [None]:
#Initialising weights parameter
W01 = 0.02 * np.random.random((784, 100)) - 0.01
W12 = 0.2 * np.random.random((100, 10)) - 0.1

In [None]:
import sys
alpha, iterations, batch_size = 0.005, 300, 100
for j in range(iterations):
  correct_cnt = 0
  for i in range(int(len(images) / batch_size)): #Using Batch Gradient descent
    batch_start, batch_end = (i * batch_size), ((i+1) * batch_size)
    #passing one example of length 784 pixels at a time
    X = images[batch_start: batch_end] 
    #Applying forward propagation
    layer_1 = tanh(np.dot(X, W01))#Output of hidden layer
    #We will employ dropout regularisation technique to avoid overfitting
    dropout = np.random.randint(2, size = layer_1.shape)
    layer_1 *= dropout * 2 #Multiply by 2 as half of the nodes are turned off 
    layer_2 = softmax(np.dot(layer_1, W12))

    for k in range(batch_size):
      correct_cnt += int(np.argmax(labels[batch_start+k:batch_start+k+1])\
                         == np.argmax(layer_2[k:k+1]))
    #Applying backward propagation
    layer_2_delta = (labels[batch_start:batch_end] - layer_2) / batch_size
    layer_1_delta = layer_2_delta.dot(W12.T) * tanh_deriv(layer_1)
    layer_1_delta *= dropout

    W12_delta = layer_1.T.dot(layer_2_delta)
    W01_delta = X.T.dot(layer_1_delta)

    #Updating weight parameters
    W01 += alpha * W01_delta
    W12 += alpha * W12_delta
#Applying the forward propagation in test dataset with best parameters
  test_correct_cnt = 0
  for i in range(len(test_images)):
    X_test = test_images[i:i+1]
    layer_1 = tanh(np.dot(X_test, W01))
    layer_2 = np.dot(layer_1, W12)
    test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
#Let's print the accuracy for both training and test data per iteration
  if j % 10 == 0:
    sys.stdout.write ("\n" + "Iterations: " + str(j) + " Training Accuracy: " +\
                      str(correct_cnt / float(len(images))) +\
                      " Testing Accuracy: " + \
                      str(test_correct_cnt / float(len(test_images))))


Iterations: 0 Training Accuracy: 0.882 Testing Accuracy: 0.8297
Iterations: 10 Training Accuracy: 0.885 Testing Accuracy: 0.8308
Iterations: 20 Training Accuracy: 0.884 Testing Accuracy: 0.8318
Iterations: 30 Training Accuracy: 0.879 Testing Accuracy: 0.8345
Iterations: 40 Training Accuracy: 0.883 Testing Accuracy: 0.8359
Iterations: 50 Training Accuracy: 0.889 Testing Accuracy: 0.8372
Iterations: 60 Training Accuracy: 0.893 Testing Accuracy: 0.8384
Iterations: 70 Training Accuracy: 0.885 Testing Accuracy: 0.8391
Iterations: 80 Training Accuracy: 0.883 Testing Accuracy: 0.8402
Iterations: 90 Training Accuracy: 0.897 Testing Accuracy: 0.8415
Iterations: 100 Training Accuracy: 0.897 Testing Accuracy: 0.8421
Iterations: 110 Training Accuracy: 0.899 Testing Accuracy: 0.843
Iterations: 120 Training Accuracy: 0.892 Testing Accuracy: 0.8436
Iterations: 130 Training Accuracy: 0.898 Testing Accuracy: 0.8444
Iterations: 140 Training Accuracy: 0.899 Testing Accuracy: 0.8458
Iterations: 150 Train

In [None]:
0