In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [2]:
data1 = pd.read_csv('emnist-letters-train.csv', header = None)
data2 = pd.read_csv('emnist-letters-test.csv', header = None)
M = (data1.iloc[:,1:784]).values
N = (data1.iloc[:,0]).values
K = (data2.iloc[:,1:784]).values
N_test = (data2.iloc[:,0]).values

In [3]:
def tanh(x):
    return np.tanh(x)
def relu(x):
    return np.maximum(x,0)
def softmax(x):
    expX = np.exp(x)
    return expX/np.sum(expX, axis = 0)

In [4]:
def d_tanh(x):
    return (1 - np.power(x, 2))
def d_relu(x):
    return np.array(x > 0, dtype = np.float32)

In [5]:
def initialize_parameters(n_x, n_h, n_y):
    w1 = np.random.randn(n_x, n_h)*0.01
    b1 = np.zeros((n_y, 1))
    w2 = np.random.randn(n_y, n_h)*0.01
    b2 = np.zeros((n_y, 1))
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2
    }
    return parameters

In [6]:
def fwrd_prop(x, parameters):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    z1 = np.dot(w1, x) + b1
    a1 = relu(z1)
    z2 = np.dot(w2, a1) + b2
    a2 = softmax(z2)
    
    fwrd_cache = {
        "z1" : z1,
        "a1" : a1,
        "z2" : z2,
        "a2" : a2
    }
    return fwrd_cache

In [7]:
def cost_function(a2, y):
    m = y.shape[1]
    cost = -(1/m)*np.sum(y*np.log(a2))
    #cost = -(1/m)*np.sum(np.sum(y*np.log(a2, 0), 1))
    return cost

In [8]:
def bkwd_prop(x, y, parameters, fwrd_cache):
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    a1 = fwrd_cache['a1']
    a2 = fwrd_cache['a2']
    
    m = x.shape[1]
    
    dz2 = (a2 - y)
    dw2 = (1/m)*np.dot(dz2, a1.T)
    db2 = (1/m)*np.sum(dz2, axis=1, keepims = True)
    
    dz1 = (1/m)*np.dot(w2.T, dz2)*derivative_relu(a1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis=1, keepdims=True)
    
    gradients = {
        "dw1" : dw1,
        "db1" : db1,
        "dw2" : dw2,
        "db2" : db2
    }
    return gradients

In [9]:
def update_parameters(parameters, gradients, lr):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    dw1 = parameters['dw1']
    db1 = parameters['db1']
    dw2 = parameters['dw2']
    db2 = parameters['db2']
    
    w1 = w1 - lr*dw1
    b1 = b1 - lr*db1
    w2 = w2 - lr*dw2
    b2 = b2 - lr*db2
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2
    }
    return parameters

In [10]:
def NeuralNetwork(x, y, n_h, lr, iter):
    n_x = x.shape[0]
    n_y = y.shape[0]
    cost_list = []
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    for i in range(iter):
        
        fwrd_cache = fwrd_prop(x, parameters)
        
        cost = cost_function(fwrd_cache['a2'], y)
        
        gradients = bkwd_prop(x, y, parameters, fwrd_cache)
        
        parameters = update_parameters(parameters, gradients, lr)
        
        cost_list.append(cost)
        
        if (i%(iter/10) == 0):
            print("cost after", i, "iteration is :", cost)
            
    return parameters