In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
data

In [None]:
X, y = data.drop(['label'], axis = 1), data['label']

In [None]:
X = X / 255

In [None]:
X_to_array = np.array(X).T
y_to_array = np.array(y).T
nx, m = X_to_array.shape

y_to_array = y_to_array.reshape(1, m)

In [None]:
index = np.random.randint(0, X_to_array.shape[1])
print("Index: ", index)
print("Label: ", y_to_array[0][index])
plt.imshow(X_to_array[:, index].reshape(28, 28), cmap='gray')
plt.show()

In [None]:
X_train = X_to_array[:, :41000]
y_train = y_to_array[0][:41000]
y_train = y_train.reshape(1, X_train.shape[1])

X_test = X_to_array[:, 41000:m]
y_test = y_to_array[0][41000:m]
y_test = y_test.reshape(1, X_test.shape[1])

In [None]:
def one_hot(y):
    one_hot_y = np.zeros((y.size, y.max() + 1))
    one_hot_y[np.arange(y.size), y] = 1
    one_hot_y = one_hot_y.T
    
    return one_hot_y

In [None]:
y_train_encoded = one_hot(y_train)
y_test_encoded = one_hot(y_test)

In [None]:
print(X_train.shape)
print(y_train_encoded.shape)
print(X_test.shape)
print(y_test_encoded.shape)

In [None]:
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x) * 0.001
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.001
    b2 = np.zeros((n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [None]:
def activation_ReLU(Z):
    return np.maximum(0, Z)

In [None]:
def activation_softmax(Z):
    expZ = np.exp(Z)
    
    return expZ / np.sum(expZ, axis = 0)

In [None]:
def deriv_ReLU(Z):
    return Z > 0

In [None]:
def forward_propagation(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    Z1 = np.dot(W1, X) + b1
    A1 = activation_ReLU(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = activation_softmax(Z2)
    
    cache = {"Z1": Z1,
             "A1": A1,
             "Z2": Z2,
             "A2": A2}
    
    return cache

In [None]:
def backward_propagation(parameters, cache, X, y):
    m = X.shape[1]
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    A1 = cache['A1']
    A2 = cache['A2']
    
    dZ2 = A2 - y
    dW2 = np.dot(dZ2, A1.T) / m
    db2 = np.sum(dZ2, axis = 1, keepdims = True) / m
    
    dZ1 = np.multiply(np.dot(W2.T, dZ2), deriv_ReLU(A1))
    dW1 = np.dot(dZ1, X.T) / m
    db1 = np.sum(dZ1, axis = 1, keepdims = True) / m
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    
    return grads

In [None]:
import copy

def update_parameters(parameters, grads, alpha = 0.001):
    W1 = copy.deepcopy(parameters['W1'])
    b1 = parameters['b1']
    W2 = copy.deepcopy(parameters['W2'])
    b2 = parameters['b2']
    
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [None]:
def compute_cost(A2 ,y):
    m = y.shape[1]
    
    cost = -(1/m)*np.sum(y*np.log(A2))
    
    return cost

In [None]:
def nn_model(X, y, n_h, num_iterations = 500):
    parameters = initialize_parameters(X.shape[0], n_h, y.shape[0])
    costs = []
    
    for i in range(num_iterations):
        cache = forward_propagation(X, parameters)
        
        cost = compute_cost(cache['A2'], y)
        costs.append(cost)
        
        grads = backward_propagation(parameters, cache, X, y)
        
        parameters = update_parameters(parameters, grads)
        
        if i % 100 == 0:
            print("Cost after iteration %i: %f" %(i, cost))
    
    return parameters, costs

In [None]:
Parameters, Costs = nn_model(X_train, y_train_encoded, 1000)

In [None]:
def accuracy(input, labels, parameters):
    cache = forward_propagation(input, parameters)
    
    predicted_y = cache['A2']
    predicted_y = np.argmax(predicted_y, axis = 0)
    
    true_y = np.argmax(labels, axis = 0)
    
    acc = np.mean(predicted_y == true_y) * 100
    
    return acc

In [None]:
print("Train Accuracy: ", accuracy(X_train, y_train_encoded, Parameters))
print("Test Accuracy: ", accuracy(X_test, y_test_encoded, Parameters))

In [None]:
index = np.random.randint(0, X_test.shape[1])
plt.imshow(X_test[:, index].reshape(28, 28), cmap='gray')
plt.show()

cache = forward_propagation(X_test[:, index].reshape(X_test.shape[0], 1), Parameters)
predicted_y = cache['A2']
predicted_y = np.argmax(predicted_y, axis = 0)
print("Predicted:", predicted_y[0])