# **3 Layer Network on EMNIST**

In [32]:
import sys, numpy as np
import pandas as pd

train = pd.read_csv("./input/emnist-balanced-train.csv",delimiter = ',')
test = pd.read_csv("./input/emnist-balanced-test.csv", delimiter = ',')
mapp = pd.read_csv("./input/emnist-balanced-mapping.txt", delimiter = ' ', \
                   index_col=0, header=None, squeeze=True)

# Constants
HEIGHT = 28
WIDTH = 28

# Split x and y
train_x = train.iloc[:1000,1:]
train_y = train.iloc[:1000,0]
del train

test_x = test.iloc[:1000,1:]
test_y = test.iloc[:1000,0]
del test

def rotate(image):
    image = image.reshape([HEIGHT, WIDTH])
    image = np.fliplr(image)
    image = np.rot90(image)
    return image

# Flip and rotate image
train_x = np.asarray(train_x)
train_x = np.apply_along_axis(rotate, 1, train_x)
test_x = np.asarray(test_x)
test_x = np.apply_along_axis(rotate, 1, test_x)

# Normalise
train_x = train_x.astype('float32')
train_x /= 255
test_x = test_x.astype('float32')
test_x /= 255

#reshape rotated images again
images = train_x.reshape(1000,28*28)
labels = train_y

one_hot_labels = np.zeros((len(labels), 47))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = test_x.reshape(len(test_x),28*28)
test_labels = np.zeros((len(test_y), 47))
for i,l in enumerate(test_y):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 750, 40, 784, 47)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))

 I:749 Train-Err:0.432 Train-Acc:0.726

In [33]:
if(j % 10 == 0 or j == iterations-1):
    error, correct_cnt = (0.0, 0)

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(test_labels[i:i+1]))
    sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                     " Test-Acc:" + str(correct_cnt/float(len(test_images))) + "\n")
    print()

 Test-Err:4.511 Test-Acc:0.187



In [35]:
import sys, numpy as np

train = pd.read_csv("./input/emnist-balanced-train.csv",delimiter = ',')
test = pd.read_csv("./input/emnist-balanced-test.csv", delimiter = ',')
mapp = pd.read_csv("./input/emnist-balanced-mapping.txt", delimiter = ' ', \
                   index_col=0, header=None, squeeze=True)

# Constants
HEIGHT = 28
WIDTH = 28

# Split x and y
train_x = train.iloc[:1000,1:]
train_y = train.iloc[:1000,0]
del train

test_x = test.iloc[:1000,1:]
test_y = test.iloc[:1000,0]
del test

def rotate(image):
    image = image.reshape([HEIGHT, WIDTH])
    image = np.fliplr(image)
    image = np.rot90(image)
    return image

# Flip and rotate image
train_x = np.asarray(train_x)
train_x = np.apply_along_axis(rotate, 1, train_x)
test_x = np.asarray(test_x)
test_x = np.apply_along_axis(rotate, 1, test_x)

# Normalise
train_x = train_x.astype('float32')
train_x /= 255
test_x = test_x.astype('float32')
test_x /= 255

#reshape rotated images again
images = train_x.reshape(1000,28*28)
labels = train_y

one_hot_labels = np.zeros((len(labels), 47))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = test_x.reshape(len(test_x),28*28)
test_labels = np.zeros((len(test_y), 47))
for i,l in enumerate(test_y):
    test_labels[i][l] = 1


np.random.seed(1)
relu = lambda x:(x>=0) * x # returns x if x > 0, return 0 otherwise
relu2deriv = lambda x: x>=0 # returns 1 for input > 0, return 0 otherwise
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 750, 40, 784, 47)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == \
                                        np.argmax(labels[i:i+1]))

        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)\
                                    * relu2deriv(layer_1)
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    sys.stdout.write("\r I:"+str(j)+ \
                     " Train-Err:" + str(error/float(len(images)))[0:5] +\
                     " Train-Acc:" + str(correct_cnt/float(len(images))))
    
    if(j % 10 == 0 or j == iterations-1):
        error, correct_cnt = (0.0, 0)

        for i in range(len(test_images)):

            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1,weights_1_2)

            error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            correct_cnt += int(np.argmax(layer_2) == \
                                            np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Test-Err:" + str(error/float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(correct_cnt/float(len(test_images))))
        print()

 I:0 Train-Err:1.136 Train-Acc:0.078 Test-Err:1.035 Test-Acc:0.129
 I:10 Train-Err:0.817 Train-Acc:0.466 Test-Err:0.890 Test-Acc:0.317
 I:20 Train-Err:0.757 Train-Acc:0.504 Test-Err:0.868 Test-Acc:0.337
 I:30 Train-Err:0.729 Train-Acc:0.509 Test-Err:0.870 Test-Acc:0.331
 I:40 Train-Err:0.707 Train-Acc:0.531 Test-Err:0.877 Test-Acc:0.326
 I:50 Train-Err:0.696 Train-Acc:0.544 Test-Err:0.886 Test-Acc:0.323
 I:60 Train-Err:0.685 Train-Acc:0.555 Test-Err:0.895 Test-Acc:0.318
 I:70 Train-Err:0.675 Train-Acc:0.564 Test-Err:0.906 Test-Acc:0.306
 I:80 Train-Err:0.662 Train-Acc:0.585 Test-Err:0.920 Test-Acc:0.315
 I:90 Train-Err:0.652 Train-Acc:0.584 Test-Err:0.935 Test-Acc:0.312
 I:100 Train-Err:0.645 Train-Acc:0.589 Test-Err:0.949 Test-Acc:0.308
 I:110 Train-Err:0.637 Train-Acc:0.59 Test-Err:0.959 Test-Acc:0.299
 I:120 Train-Err:0.627 Train-Acc:0.588 Test-Err:0.969 Test-Acc:0.287
 I:130 Train-Err:0.618 Train-Acc:0.594 Test-Err:0.981 Test-Acc:0.28
 I:140 Train-Err:0.608 Train-Acc:0.611 Test-Err

# **Dropout In Code**

In [0]:
i = 0
layer_0 = images[i:i+1]
dropout_mask = np.random.randint(2,size=layer_1.shape)

layer_1 *= dropout_mask * 2
layer_2 = np.dot(layer_1, weights_1_2)

error += np.sum((labels[i:i+1] - layer_2) ** 2)

correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i+i+1]))

layer_2_delta = (labels[i:i+1] - layer_2)
layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

layer_1_delta *= dropout_mask

weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

In [37]:
import numpy, sys
np.random.seed(1)
def relu(x):
    return (x >= 0) * x # returns x if x > 0
                        # returns 0 otherwise

def relu2deriv(output):
    return output >= 0 #returns 1 for input > 0

alpha, iterations, hidden_size = (0.005, 750, 100)
pixels_per_image, num_labels = (784, 47)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0,0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1] - layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask

        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)

    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:1.017 Test-Acc:0.068 Train-Err:1.417 Train-Acc:0.039
I:10 Test-Err:0.914 Test-Acc:0.214 Train-Err:0.917 Train-Acc:0.235
I:20 Test-Err:0.884 Test-Acc:0.27 Train-Err:0.871 Train-Acc:0.258
I:30 Test-Err:0.867 Test-Acc:0.316 Train-Err:0.843 Train-Acc:0.286
I:40 Test-Err:0.856 Test-Acc:0.321 Train-Err:0.830 Train-Acc:0.306
I:50 Test-Err:0.851 Test-Acc:0.329 Train-Err:0.826 Train-Acc:0.31
I:60 Test-Err:0.847 Test-Acc:0.33 Train-Err:0.814 Train-Acc:0.321
I:70 Test-Err:0.849 Test-Acc:0.32 Train-Err:0.803 Train-Acc:0.321
I:80 Test-Err:0.851 Test-Acc:0.321 Train-Err:0.807 Train-Acc:0.321
I:90 Test-Err:0.852 Test-Acc:0.334 Train-Err:0.805 Train-Acc:0.317
I:100 Test-Err:0.848 Test-Acc:0.337 Train-Err:0.791 Train-Acc:0.34
I:110 Test-Err:0.852 Test-Acc:0.328 Train-Err:0.781 Train-Acc:0.357
I:120 Test-Err:0.857 Test-Acc:0.337 Train-Err:0.793 Train-Acc:0.335
I:130 Test-Err:0.860 Test-Acc:0.321 Train-Err:0.796 Train-Acc:0.321
I:140 Test-Err:0.862 Test-Acc:0.324 Train-Err:0.812 Train-Acc:0

# **Batch Gradient Descent**

In [40]:
np.random.seed(1)

def relu(x):
    return (x >= 0) * x # returns x if x > 0

def relu2deriv(output):
    return output >= 0 # returns 1 for input > 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 47, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
            
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:1.164 Test-Acc:0.031 Train-Err:2.519 Train-Acc:0.019
I:10 Test-Err:0.981 Test-Acc:0.137 Train-Err:1.016 Train-Acc:0.121
I:20 Test-Err:0.957 Test-Acc:0.18 Train-Err:0.974 Train-Acc:0.159
I:30 Test-Err:0.942 Test-Acc:0.203 Train-Err:0.946 Train-Acc:0.223
I:40 Test-Err:0.929 Test-Acc:0.224 Train-Err:0.927 Train-Acc:0.235
I:50 Test-Err:0.916 Test-Acc:0.236 Train-Err:0.915 Train-Acc:0.234
I:60 Test-Err:0.907 Test-Acc:0.258 Train-Err:0.900 Train-Acc:0.245
I:70 Test-Err:0.898 Test-Acc:0.266 Train-Err:0.891 Train-Acc:0.27
I:80 Test-Err:0.893 Test-Acc:0.271 Train-Err:0.880 Train-Acc:0.251
I:90 Test-Err:0.887 Test-Acc:0.277 Train-Err:0.866 Train-Acc:0.275
I:100 Test-Err:0.883 Test-Acc:0.291 Train-Err:0.862 Train-Acc:0.291
I:110 Test-Err:0.877 Test-Acc:0.294 Train-Err:0.853 Train-Acc:0.313
I:120 Test-Err:0.875 Test-Acc:0.32 Train-Err:0.851 Train-Acc:0.291
I:130 Test-Err:0.874 Test-Acc:0.316 Train-Err:0.860 Train-Acc:0.263
I:140 Test-Err:0.870 Test-Acc:0.316 Train-Err:0.849 Train-Acc