In [1]:
from sklearn.datasets import load_digits
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
X, y = load_digits(return_X_y=True)
y = y >= 5 + 0

In [3]:
def CostFunction(y_hat, y):
    m = y.shape[0]
    return -1 / m * np.nansum(
        np.matmul(np.log(y_hat), y) + np.matmul(np.log(1 - y_hat), 1 - y))

In [4]:
class BigSmallNumberNN():
    def __init__(self):
        self.W1 = np.random.randn(32, 64) * 0.1
        self.b1 = np.zeros((32, 1))

        self.W2 = np.random.randn(10, 32) * 0.1
        self.b2 = np.zeros((10, 1))

        self.W3 = np.random.randn(1, 10) * 0.1
        self.b3 = np.zeros((1, 1))

    def forward(self, X):
        self.A0 = X

        self.Z1 = np.matmul(self.W1, self.A0) + self.b1
        self.A1 = self.ReLU(self.Z1)

        self.Z2 = np.matmul(self.W2, self.A1) + self.b2
        self.A2 = self.ReLU(self.Z2)

        self.Z3 = np.matmul(self.W3, self.A2) + self.b3
        self.A3 = self.Sigmoid(self.Z3)
        return self.A3

    def ReLU(self, z):
        return np.maximum(z, 0)

    def Sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def backward(self, y, lr=0.01):
        m = y.shape[0]
        self.dZ3 = 1 / m * (self.A3 - y)
        self.dW3 = np.matmul(self.dZ3, self.A2.T)
        self.db3 = np.sum(self.dZ3, axis=1, keepdims=True)

        self.W3 += lr * self.dW3
        self.b3 += lr * self.db3

        self.dA2 = np.matmul(self.W3.T, self.dZ3)
        self.dZ2 = self.dA2 * np.int64(self.Z2 > 0)
        self.dW2 = np.matmul(self.dZ2, self.A1.T)
        self.db2 = np.sum(self.dZ2, axis=1, keepdims=True)

        self.W2 += lr * self.dW2
        self.b2 += lr * self.db2

        self.dA1 = np.matmul(self.W2.T, self.dZ2)
        self.dZ1 = self.dA1 * np.int64(self.Z1 > 0)
        self.dW1 = np.matmul(self.dZ1, self.A0.T)
        self.db1 = np.sum(self.dZ1, axis=1, keepdims=True)

        self.W1 += lr * self.dW1
        self.b1 += lr * self.db1

In [5]:
nn = BigSmallNumberNN()

In [6]:
for i in range(200):
    nn_out = nn.forward(X.T)
    cost = CostFunction(nn_out, y)
    nn.backward(y)