In [1]:
import numpy as np
import pandas as pd


In [2]:

df_train = pd.read_csv('emnist-letters-train.csv')
df_test = pd.read_csv('emnist-letters-test.csv')

In [3]:

X=df_train.drop(['Y'], axis=1)
X_train=X.to_numpy().astype('float64')
X_train = X_train/255.0
m,n=X_train.shape
X_train = X_train.T
Y_train=df_train['Y'].to_numpy().astype('int').reshape(1, m)


In [4]:
print(X_train)
print(Y_train)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
[[23  7 16 ...  1 23 12]]


In [5]:

X=df_test.drop(['Y'], axis=1)
X_test=X.to_numpy().astype('float64')
X_test = X_test/255.0
mt,nt=X_test.shape
X_test = X_test.T
Y_test=df_test['Y'].to_numpy().astype('int').reshape(1, mt)

In [6]:
def init_params():
    W1 = np.random.rand(26, 784) - 0.5
    b1 = np.random.rand(26, 1) - 0.5
    W2 = np.random.rand(26, 26) - 0.5
    b2 = np.random.rand(26, 1) - 0.5
    return W1, b1, W2, b2


In [7]:

def ReLU(Z):
    return np.maximum(Z, 0)


In [8]:

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A


In [9]:
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2


In [10]:

def ReLU_deriv(Z):
    return Z > 0


In [11]:

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() ))
    one_hot_Y[np.arange(Y.size), Y-1] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y


In [12]:

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2


In [13]:

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [14]:
def get_predictions(A2):
    return (np.argmax(A2, 0) + 1)


In [15]:

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size


In [16]:

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 1000 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print(get_accuracy(predictions, Y))
    return W1, b1, W2, b2

In [25]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 0.7, 4001)

Iteration:  0
[22  2  1 ... 22 17 10] [[23  7 16 ...  1 23 12]]
0.03280405405405405
Iteration:  1000
[11  7 16 ...  1 23 12] [[23  7 16 ...  1 23 12]]
0.7551126126126126
Iteration:  2000
[11  7 16 ...  1 23  9] [[23  7 16 ...  1 23 12]]
0.7835022522522522
Iteration:  3000
[11  7 16 ...  1 23 12] [[23  7 16 ...  1 23 12]]
0.8056531531531531
Iteration:  4000
[11  7 16 ...  1 23 12] [[23  7 16 ...  1 23 12]]
0.8082432432432433


In [26]:
Z1 = W1.dot(X_test) + b1
A1 = ReLU(Z1) 
Z2 = W2.dot(A1) + b2
A2 = softmax(Z2)
print("Accuracy:" , get_accuracy(get_predictions(A2),Y_test))

[ 1  1  1 ... 19 19 19] [[ 1  1  1 ... 19 19 19]]
Accuracy: 0.7637162162162162
