In [613]:
# Adapted from: https://www.kaggle.com/code/wwsalmon/simple-mnist-nn-from-scratch-numpy-no-tf-keras
import numpy as np

In [593]:
X = np.array([[0, 0, 0],
              [0, 0, 1],
              [0, 1, 0],
              [0, 1, 1],
              [1, 0, 0],
              [1, 0, 1]])

X = X.T

Y = np.array([[0],
              [0],
              [0],
              [1],
              [0],
              [1]])

M = X.shape[1]

In [594]:
X = (X - np.mean(X)) / np.std(X)

In [595]:
nZ1 = 10 # number of nodes in hidden layer Z1
nZ2 = 20 # number of nodes in Z2

In [596]:
def init_params():
    W1 = np.random.rand(nZ1, X.shape[0]) - 0.5
    W2 = np.random.rand(nZ2, nZ1) - 0.5
    W3 = np.random.rand(Y.shape[1], nZ2) - 0.5
    b1 = np.random.rand(nZ1, 1) - 0.5
    b2 = np.random.rand(nZ2, 1) - 0.5
    b3 = np.random.rand(Y.shape[1], 1) - 0.5
    
    return W1, b1, W2, b2, W3, b3

In [597]:
leakyReLUConstant = 0.01

def leakyReLU(X):
    return np.maximum(leakyReLUConstant * X, X)

def derivativeLeakyReLU(X):
    return np.where(X > 0, 1, leakyReLUConstant)

def sigmoid(X):
    return 1 / (1 + np.exp(-X))

def derivativeSigmoid(X):
    return X * (1 - X)

In [598]:
def forward(X, W1, W2, W3, b1, b2, b3):
    Z1 = W1.dot(X) + b1
    A1 = leakyReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = leakyReLU(Z2)
    Z3 = W3.dot(A2) + b3
    A3 = sigmoid(Z3)
    
    return Z1, A1, Z2, A2, Z3, A3


In [599]:
def backwardProp(X, Y, W2, W3, Z1, A1, Z2, A2, Z3, A3):
    dZ3 = (A3 - Y.T)
    dW3 = 1/M * dZ3.dot(A2.T)
    db3 = 1/M * np.sum(dZ3)
    dZ2 = derivativeLeakyReLU(Z2) * W3.T.dot(dZ3)
    dW2 = 1/M * dZ2.dot(A1.T)
    db2 = 1/M * np.sum(dZ2)
    dZ1 = derivativeLeakyReLU(Z1) * W2.T.dot(dZ2)
    dW1 = 1/M * dZ1.dot(X.T)
    db1 = 1/M * np.sum(dZ1)
    
    return dW3, db3, dW2, db2, dW1, db1

In [600]:
def update_params(alpha, W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    
    W3 = W3 - alpha * dW3
    b3 = b3 - alpha * db3
    
    return W1, b1, W2, b2, W3, b3

In [601]:
def get_predictions(A3):
    return np.where(A3 > 0.5, 1, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / M

In [602]:
def learn(X, Y, alpha, iter):
    W1, b1, W2, b2, W3, b3 = init_params()
    for i in range(iter):
        Z1, A1, Z2, A2, Z3, A3 = forward(X, W1, W2, W3, b1, b2, b3)
        dW3, db3, dW2, db2, dW1, db1 = backwardProp(X, Y, W2, W3, Z1, A1, Z2, A2, Z3, A3)
        W1, b1, W2, b2, W3, b3 = update_params(alpha, W1, b1, W2, b2, W3, b3, dW1, db1, dW2, db2, dW3, db3)
        if i % 10 == 0:
            print("Iteration", i)
            predictions = get_predictions(A3)
            print(A3)
            print(Y.T)
            print(predictions)
            print(get_accuracy(predictions, Y.T))
    return W1, b1, W2, b2, W3, b3

In [603]:
def testPrediction(testX, W1, W2, W3, b1, b2, b3):
    Z1, A1, Z2, A2, Z3, A3 = forward(testX, W1, W2, W3, b1, b2, b3)
    return A3

In [618]:
W1, b1, W2, b2, W3, b3 = learn(X, Y, 0.01, 500)

Iteration 0
[[0.69069211 0.65473233 0.67547905 0.6135959  0.61541254 0.59345704]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 10
[[0.63634255 0.60438116 0.62793192 0.58940783 0.57621544 0.56640568]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 20
[[0.59693293 0.57012939 0.59824714 0.57379681 0.55325234 0.54926684]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 30
[[0.57333618 0.54376974 0.57996032 0.56414567 0.53882752 0.5390542 ]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 40
[[0.55491111 0.52058894 0.56390077 0.55801587 0.5271057  0.53162698]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 50
[[0.54120599 0.50247133 0.55009822 0.55315    0.51651151 0.52600357]]
[[0 0 0 1 0 1]]
[[1 1 1 1 1 1]]
0.3333333333333333
Iteration 60
[[0.53064957 0.48975394 0.53861811 0.54768772 0.50860096 0.52248761]]
[[0 0 0 1 0 1]]
[[1 0 1 1 1 1]]
0.5
Iteration 70
[[0.52151818 0.47816288 0.52817918 0.54261775 0.50167152 0.520463

In [619]:
testX = np.array([[0, 0, 0],
                  [1, 1, 1],
                  [0, 0, 1],
                  [1, 1, 0]])
testX = (testX - np.mean(testX)) / np.std(testX)

testX = testX.T
print(testPrediction(testX, W1, W2, W3, b1, b2, b3))


[[0.21752891 0.91074511 0.1293169  0.52712876]]
