In [1]:
import numpy as np
import pandas as pd

In [2]:
emnist_train = pd.read_csv("/Users/jaladisaran/Downloads/emnist-letters-train.csv")
emnist_test = pd.read_csv("/Users/jaladisaran/Downloads/emnist-letters-test.csv")

In [3]:
train = emnist_train.to_numpy()
test = emnist_test.to_numpy()

In [4]:
X_train = train[:,1:]
X_test = test[:,1:]

In [5]:
Y_train = train[:,0]
Y_test = test[:,0]

In [6]:
X_train=X_train/255
X_test=X_test/255

In [7]:
Z_train= np.zeros((X_train.shape[0],len(np.unique(Y_train))))
Z_train.shape

(88799, 26)

In [8]:
for i in range(X_train.shape[0]):
    Z_train[i][Y_train[i]-1] = 1

Z_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [55]:
def function(X,W,b):
    z = np.dot(X,W)+b
    a = 1/(1+np.exp(-z))
    return a

def forward_prop(X,W1,b1,W2,b2):
    H1 = function(X,W1,b1)
    A  = function(H1,W2,b2)
    return A,H1

def err(A,Y):
    err = A-Y
    return err

def dsig(A):
    z = np.multiply(A,1-A)
    return z

def loss_function(X,Y,W1,b1,W2,b2):
    m=X.shape[0]
    a = forward_prop(X,W1,b1,W2,b2)[0]
    f1 = np.log(a)
    f2 = np.log(1-a)
    loss = -np.sum(np.multiply(Y,f1)+np.multiply((1-Y),f2))
    loss = loss/m
    return loss

def gradient_compute(X,Y,W1,b1,W2,b2):
    A,H1 = forward_prop(X,W1,b1,W2,b2)
    
    m = X.shape[0]
    dj_dw2 = np.dot(H1.T,np.multiply(err(A,Y),dsig(A)))/m
    dj_db2 = np.sum(np.multiply(err(A,Y),dsig(A)),axis =0)/m
    dj_dw1 = np.dot(X.T,np.multiply(np.dot(dj_db2,W2.T),dsig(H1)))/m
    dj_db1 = np.sum(np.multiply(np.dot(dj_db2,W2.T),dsig(H1)),axis=0)/m
    
    return dj_dw2,dj_db2,dj_dw1,dj_db1

def gradient_descent(X,Y,W1,b1,W2,b2,L,n):
    
    for i in range(n):
        
        dj_dw2 = gradient_compute(X,Y,W1,b1,W2,b2)[0]
        dj_db2 = gradient_compute(X,Y,W1,b1,W2,b2)[1]
        dj_dw1 = gradient_compute(X,Y,W1,b1,W2,b2)[2]
        dj_db1 = gradient_compute(X,Y,W1,b1,W2,b2)[3]
        
        W2 = W2 - L*dj_dw2
        W1 = W1 - L*dj_dw1
        b2 = b2 - L*dj_db2
        b1 = b1 - L*dj_db1
        
        loss = loss_function(X,Y,W1,b1,W2,b2)
        print(i,loss)
        
    return W1,b1,W2,b2

In [129]:
n=26
W1 = np.random.rand(X_train.shape[1],n)-0.5
b1 = np.random.rand(n)-0.5
W2 = np.random.rand(n,Z_train.shape[1])-0.5
b2 = np.random.rand(Z_train.shape[1])-0.5

In [130]:
W1_final,b1_final,W2_final,b2_final = gradient_descent(X_train,Z_train,W1,b1,W2,b2,0.001,100)

0 22.48596217845759
1 22.446854778798713
2 22.407828537371344
3 22.368883500584115
4 22.330019712976416
5 22.291237217224126
6 22.252536054145377
7 22.213916262706523
8 22.17537788002821
9 22.136920941391455
10 22.098545480243995
11 22.060251528206624
12 22.022039115079448
13 21.983908268848854
14 21.94585901569371
15 21.907891379992414
16 21.87000538432967
17 21.83220104950338
18 21.794478394531755
19 21.756837436660376
20 21.719278191369458
21 21.681800672381137
22 21.64440489166686
23 21.607090859454917
24 21.569858584237956
25 21.53270807278062
26 21.495639330127364
27 21.458652359610102
28 21.421747162856374
29 21.384923739797074
30 21.34818208867454
31 21.311522206050785
32 21.274944086815633
33 21.238447724194952
34 21.20203310975901
35 21.165700233430943
36 21.129449083495196
37 21.093279646606074
38 21.057191907796277
39 21.02118585048576
40 20.98526145649028
41 20.94941870603031
42 20.913657577739816
43 20.87797804867528
44 20.8423800943246
45 20.806863688616115
46 20.7714288

In [131]:
NN_output = forward_prop(X_test,W1_final,b1_final,W2_final,b2_final)[0]
NN_output.shape

(14799, 26)

In [132]:
max = NN_output.max(axis=1)
for i in range(X_test.shape[0]):
    for j in range(len(np.unique(Y_train))):
        if NN_output[i][j] == max[i]:
            NN_output[i][j] = 1
        else:
            NN_output[i][j] = 0
            
NN_output

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [133]:
NN_predicted = np.empty_like(Y_test)
for i in range(X_test.shape[0]):
    for j in range(len(np.unique(Y_train))):
        if NN_output[i][j]==1:
            NN_predicted[i]=j+1
            
NN_predicted


array([6, 6, 6, ..., 6, 6, 6])

In [134]:
def accuracy(yout,ytest):
    m = yout.shape[0]
    count=0
    for i in range(m):
        if yout[i]==ytest[i]:
            count+=1
            
    acc = count/m*100
    return acc

In [135]:
accuracy(NN_predicted,Y_test)

5.405770660179742

In [136]:
count = 0
for i in NN_predicted:
    if i==12:
        count+=1

In [137]:
count

0

In [138]:
Z_train.shape

(88799, 26)