<a href="https://colab.research.google.com/github/LokeshVadlamudi/DeepLearningClass/blob/master/dl1mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing the libraries

In [0]:
import numpy as np

In [0]:
from keras.datasets import mnist

#Loading the dataset

In [0]:
from keras.preprocessing.image import ImageDataGenerator

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test = X_test.reshape((X_test.shape[0], 28, 28, 1))


X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#Data Augmentation

In [0]:
X_train = X_train[0:10000]
Y_train = Y_train[0:10000]

#preparing data for rotation 
a=X_train[0:1000]
b=Y_train[0:1000]

datagen = ImageDataGenerator(rotation_range=90)
datagen.fit(X_train)
complete= datagen.flow(a, b,batch_size=len(a))

#now lets concatenate both the augmented data and original data to training input and output set

In [0]:
for images,labels in complete:
  x_train = np.concatenate((X_train,images),axis = 0)
  y_train = np.concatenate((Y_train,labels),axis = 0)
  break


#Normalizing the data inputs

In [0]:
inputImgs, outputlabels = (x_train[0:11000].reshape(11000,28*28))/255,y_train[0:11000]

#One Hot encoding the output labels

In [0]:
hotLabels = np.zeros((len(outputlabels),10))


In [0]:
for i,l in enumerate(outputlabels):
  hotLabels[i][l] = 1
outputlabels = hotLabels

In [0]:
test_images = x_test.reshape(len(x_test),28*28)/255
test_labels = np.zeros((len(y_test),10))


In [0]:
for i,l in enumerate(y_test):
  test_labels[i][l] = 1

#Used Different Activation functions ( added relu , tanh and softmax)

In [0]:
def tanh(x):
  return np.tanh(x)

In [0]:
def tanh2deriv(output):
  return 1 - (output ** 2)

In [0]:
def relu(x):
  return (x>=0)*x

In [0]:
def relu2deriv(x):
  return x>=0

In [0]:
def softmax(x):
  temp = np.exp(x)
  return temp / np.sum(temp,axis = 1, keepdims = True)

#Number of iterations, and different hidden layer sizes

In [0]:
iterations , hidden_size, hidden_size1 = (100,200,100)

#different learning rates for different layers

In [0]:
alpha,beta,gamma = 2,3,1

#Initial input and number of output labels

In [0]:
inputPixels , num_labels = (784,10)

#batch size for stochastic mini-batch gradient descent

In [0]:
batch_size = 100

#Weights between layer_0 and layer_1

In [0]:
weights_0_1 = 0.2*np.random.random((inputPixels,hidden_size))-0.1

#weights between layer_1 and layer_2

In [0]:
weights_1_2 = 0.2*np.random.random((hidden_size,hidden_size1))-0.1

#Added additional weights for extra layer

In [0]:
weights_2_3 = 0.2 * np.random.random((hidden_size1,num_labels)) - 0.1

#Added an extra layer to make it three layer Neural Network.

In [224]:
for j in range(iterations):
  correct_cnt = 0


  for i in range(int(len(inputImgs) / batch_size)):


    batch_start, batch_end = ((i * batch_size), ((i + 1)*batch_size))
    layer_0 = inputImgs[batch_start:batch_end]

    layer_1 = tanh(np.dot(layer_0,weights_0_1))

    #drop out mask on layer 1
    dropout_mask = np.random.randint(2,size=layer_1.shape)
    layer_1 *= dropout_mask*2

    layer_2 = relu(np.dot(layer_1,weights_1_2))

    #drop out mask on layer 2
    dropout_mask = np.random.randint(2,size=layer_2.shape)
    layer_2 *= dropout_mask*2

    layer_3 = softmax(np.dot(layer_2,weights_2_3))

    for k in range(batch_size):
      correct_cnt += int(np.argmax(layer_3[k:k+1])== np.argmax(outputlabels[batch_start+k:batch_start+k+1]))
    
    layer_3_delta = (outputlabels[batch_start:batch_end] - layer_3) / (batch_size * layer_3.shape[0])
    layer_2_delta = layer_3_delta.dot(weights_2_3.T) * relu2deriv(layer_2)
    layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)


    weights_2_3 += gamma * layer_2.T.dot(layer_3_delta)
    weights_1_2 += beta * layer_1.T.dot(layer_2_delta)
    weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

  test_correct_cnt = 0
  
  for i in range(len(test_images)):
    Ypred = []
    Ylabels = []
    layer_0 = test_images[i:i+1]
    layer_1 = tanh(np.dot(layer_0,weights_0_1))
    layer_2 = relu(np.dot(layer_1,weights_1_2))
    layer_3 = np.dot(layer_2,weights_2_3)
    

    test_correct_cnt += int(np.argmax(layer_3) == np.argmax(test_labels[i:i+1]))
    Ypred += [int(np.argmax(layer_3))]
    Ylabels += [np.argmax(test_labels[i:i+1])]
  
  if(j % 10 == 0):
    print(str(j) + " test_acc" + str(test_correct_cnt/float(len(test_images))) + " Train_acc" + str(correct_cnt/float(len(inputImgs))))







0 test_acc0.6793 Train_acc0.2801818181818182
10 test_acc0.8813 Train_acc0.7899090909090909
20 test_acc0.9016 Train_acc0.8271818181818181
30 test_acc0.913 Train_acc0.839
40 test_acc0.923 Train_acc0.849909090909091
50 test_acc0.9263 Train_acc0.8492727272727273
60 test_acc0.9254 Train_acc0.853
70 test_acc0.9256 Train_acc0.842
80 test_acc0.9178 Train_acc0.8174545454545454
90 test_acc0.883 Train_acc0.7203636363636363


#lets us create confusion matrix

In [0]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [226]:
confusion_matrix(Ylabels, Ypred)


array([[1]])

In [227]:
accuracy_score(Ylabels, Ypred)*100

100.0