In [1]:
#create an artificial neural network from scratch to predict letters/digits(handwritten)
#from the emnist(balanced) dataset

#importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import sklearn
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv("/kaggle/input/emnist/emnist-balanced-train.csv")
data = df.to_numpy()
data_train,data_test = train_test_split(data,test_size = 0.2)
#there are 47 classes in the dataset,each mapped to a particular ascii value

70837615
17709600


In [29]:
#print(data_train.shape) = (90239,785)
#print(data_test.shape) = (22560,785)
Xtrain = (data_train[:,1:].T)/(28*28)
Ytrain = data_train[:,0].T
X_test = data_test[:,1:].T
Y_test = data_test[:,0].T

In [49]:
Ytrain.reshape((1,data_train.shape[0]))
Y_test.reshape((1,data_test.shape[0]))

array([[33,  9, 34, ..., 13, 13, 43]])

In [58]:
m = np.shape(Ytrain)[0]
Y_train_new = np.zeros((47,m))
for i in range(m):
    Y_train_new[Ytrain[i],i] = 1
print(Y_train_new.shape)
print(Y_train_new)

(47, 90239)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


In [76]:
def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    save = Z
    return A,save
def relu(Z):
    A = Z.copy()
    A[A < 0] = 0
    save = Z
    return A,save
def sigmoid_back(dA,act_cache):
    Z = act_cache
    A,save = sigmoid(Z)
    dZ = dA*A*(1-A)
    return dZ
def relu_back(dA,act_cache):
    Z = act_cache + np.exp(-5)
    Z = (Z>0).astype(int)
    dZ = dA*Z
    return dZ    

In [60]:
#initializing the parameters
np.random.seed(1)
def initialize_params(layer_dim):
    n = len(layer_dim)
    params = {}
    for i in range(1,n):
        params['w'+str(i)] = np.random.randn(layer_dim[i],layer_dim[i-1])*0.01
        params['b'+str(i)] = np.zeros((layer_dim[i],1))
    return params

In [61]:
def forward(A_prev,w,b):
    z = np.dot(w,A_prev)+b
    cache = (A_prev,w,b)
    return z,cache

In [62]:
def forward_with_act(A_prev,w,b,key):
    z,raw_cache = forward(A_prev,w,b)
    if(key == 'sigmoid'):
        A,act_cache = sigmoid(z)
    if(key == 'relu'):
        A,act_cache = relu(z)
    store = (raw_cache,act_cache)
    return A,store
    

In [63]:
def forward_complete(X,params):
    caches = []
    A_prev = X
    L = int(len(params)/2)
    for i in range(1,L):
        A,store = forward_with_act(A_prev,params['w'+str(i)],params['b'+str(i)],'relu')
        A_prev = A
        caches.append(store)
    Al,store = forward_with_act(A_prev,params["w"+str(L)],params["b"+str(L)],"sigmoid")
    caches.append(store)
    return Al, caches
        

In [64]:
def get_cost(Al,Y):
    m = np.shape(Y)[0]
    cost = (-1/m)*(np.dot(Y,np.log(Al).T) + np.dot(1-Y,np.log(1-Al).T))
    cost = np.squeeze(cost)
    return cost

In [81]:
def backward(dZ,cache):
    m = np.shape(dZ)[1]
    #raw_cache,act_cache = cache
    A_prev,w,b = cache
    dw = (1/m)*np.dot(dZ,A_prev.T)
    db = (1/m)*np.sum(dZ,axis = 1,keepdims = True)
    dA_prev = np.dot(w.T,dZ)
    return dw,db,dA_prev
    

In [66]:
def backward_with_act(dA,cache,key):
    raw_cache,act_cache = cache
    if(key == 'relu'):
        dZ = relu_back(dA,act_cache)
        dA_prev,dw,db = backward(dZ,raw_cache)
    if(key == 'sigmoid'):
        dZ = sigmoid_back(dA,act_cache)
        dA_prev,dw,db = backward(dZ,raw_cache)
    return dA_prev,dw,db
     

In [85]:
def backward_complete(Al,Y,caches):
    gradients = {}
    L = len(caches)
    m = Al.shape[1]
    Y = Y.reshape(Al.shape)
    dAl = -(np.divide(Y,Al) - np.divide(1-Y,1-Al))
    dw,db,dA_prev = backward_with_act(dAl,caches[L-1],'sigmoid')
    gradients['dw'+str(L)] = dw
    gradients['db'+str(L)] = db
    gradients['dA'+str(L-1)] = dA_prev
    for i in range(L-1,0,-1):
        cache = caches[i]
        dw,db,dA_prev = backward_with_act(dA_prev,cache,'relu')
        gradients['dw'+str(i)] = dw
        gradients['db'+str(i)] = db
        gradients['dA'+str(i-1)] = dA_prev
        return gradients

In [68]:
def update_params(params,gradients,learning_rate):
    params1 = params.copy()
    L = len(params)
    for i in range(L):
        params['w'+str(l+1)] = params['w'+str(l+1)] - learning_rate*gradients['dw'+str(l+1)]
        params['b'+str(l+1)] = params['b'+str(l+1)] - learning_rate*gradient['db'+str(l+1)]
    return params

In [69]:
 def L_Layer_NN_Model(X_train,Y,layers_dimensions,learning_rate = 0.01,iterations = 1000,print_cost = True):
    np.random.seed(1)
    costs = []
    params = initialize_params(layers_dimensions)
    for i in range(iterations):
        Al,caches = forward_complete(X_train,params)
        cost = get_cost(Al,Y)
        gradients = backward_complete(Al,Y,caches)
        for l in range(1,len(layers_dimensions)):
            params['w'+str(l)] = params['w'+str(l)] - learning_rate*gradients['dw'+str(l)]
            params['b'+str(l)] = params['b'+str(l)] - learning_rate*gradients['db'+str(l)]
        if (print_cost and i%100 == 0) or i == num_iterations - 1:
            print("cost after iteration{} : {}".format(i,np.squeeze(cost)))
        if i%100 == 0 or i == num_iterations:
            costs.append(cost)
    return params,costs


In [70]:
def predict(X,Y,params):
    Al,caches = forward_complete(X,params)
    m = X.shape[1]
    pred = np.zeros(Y.shape)
    for i in range(m):
        index = np.argmax(Al[:,i])
        pred[i] = index
    return pred
        
    

In [71]:
def get_accuracy(Y,pred):
    count_ones = list[(Y == pred).astype(int)].count(1)
    accuracy = count_ones/Y.shape[1]
    return accuracy
    

In [86]:
layer_dimensions = [784,415,207,47]
params,costs = L_Layer_NN_Model(Xtrain,Y_train_new,layer_dimensions)
print("cost after first iteration: " + str(costs[0]))


ValueError: operands could not be broadcast together with shapes (207,90239) (47,90239) 

In [None]:
pred = predict(X_test,Y_test,params)
accuracy = get_accuracy(Y_test,pred)