In [1]:
import numpy as np

## MLP Implemenation
exception handling skipped

In [2]:
class NeuralNetwork():
    def __init__(self, sizes):
        self.layers = len(sizes) # number of layers, including input and output layers
        self.sizes = sizes       # number of neurons in each layers. Its a vector
        self.weights = [np.random.rand(self.sizes[i-1], self.sizes[i]) for i in range(1, self.layers)]
        self.bias =  [np.random.rand(s, 1) for s in self.sizes[1:]]

    # forward propagation function
    def forward(self, x):
        for w, b in list(zip(self.weights, self.bias)):
            x = self.sigmoid(w.T @ x + b)
        return x

    # backward propagation function
    def backprop(self, x, y):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b =  [np.zeros(b.shape) for b in self.bias]
        deltas = [np.zeros(w.shape) for w in self.weights]

        # forward propagation
        activation = x
        activations = [x]
        zs = []

        for w, b in list(zip(self.weights, self.bias)):
            z = w.T @ activation + b
            zs.append(z)
            activation = self.sigmoid(z)
            activations.append(activation)

        # backprop
        deltas[-1] = 2 * (activations[-1] - y)

        for i in range(self.layers-2, -1, -1):
            diag_zs_prime = np.diag(self.sigmoid_prime(zs[i]).flatten())
            delta = diag_zs_prime.T @ deltas[i]
            nabla_w[i] = activations[i] @ delta.T
            nabla_b[i] = delta

            v = self.weights[i] @ diag_zs_prime
            deltas[i-1] = v @ deltas[i]

        # for nw, nb in list(zip(nabla_w, nabla_b)):
        #     print(nw.shape, nb.shape, end=" ")
        # print()
            
        return (nabla_w, nabla_b)


    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_prime(self, x):
        return np.exp(x) / ((1 + np.exp(x)) ** 2)

## Gradient Descent Implementation

In [3]:
def SGD(model, x, y, step_length, n_epochs=1000):
    
    for i in range(n_epochs):

        e = 0
        nabla_w = [np.zeros(w.shape) for w in model.weights]
        nabla_b = [np.zeros(b.shape) for b in model.bias]
        
        for xi, yi in list(zip(x, y)):
            xi = np.array(xi).reshape(-1, 1)
            yi = np.array(yi).reshape(-1, 1)
            
            # forward propagation
            y_hat = model.forward(xi)
            e = np.sum((y_hat - yi) ** 2)

            # backward propagation
            delta_nabla_w, delta_nabla_b = model.backprop(xi, yi)
            nabla_w = [nw + dnw for nw, dnw in list(zip(nabla_w, delta_nabla_w))]
            nabla_b = [nb + dnb for nb, dnb in list(zip(nabla_b, delta_nabla_b))]
            
        model.weights = [mw - step_length * nw for mw, nw in list(zip(model.weights, nabla_w))]
        model.bias = [mb - step_length * nb for mb, nb in list(zip(model.bias, nabla_b))]

    
        if i %100 == 0:
            print(f"Epoch {i}: Error : {e}")

## Verification

In [4]:
from sklearn.datasets import make_multilabel_classification

data = make_multilabel_classification(n_samples=1000, n_classes=4, n_features=4, random_state=42)
X, y = data
X = np.asarray(X)
y = np.asarray(y)
print(X.shape, type(X))
print(y.shape, type(y))

(1000, 4) <class 'numpy.ndarray'>
(1000, 4) <class 'numpy.ndarray'>


In [5]:
sizes = [X.shape[1], 8, 4]
model = NeuralNetwork(sizes)
print(model)

<__main__.NeuralNetwork object at 0x789915b15150>


In [6]:
n_epochs = 1000
step_length = 0.01
SGD(model, X, y, step_length, n_epochs)

Epoch 0: Error : 0.002716219737427043
Epoch 100: Error : 3.5764331730066363
Epoch 200: Error : 2.6031613715624258
Epoch 300: Error : 1.9269002213009139
Epoch 400: Error : 1.92676713083079
Epoch 500: Error : 1.4504929899131977
Epoch 600: Error : 1.6901573254337356
Epoch 700: Error : 1.0693091520153255
Epoch 800: Error : 1.1493601886416813
Epoch 900: Error : 1.8965593877047708


In [15]:
X_test = X[:100]
y_test = y[:100]


for i in range(100):
    xt = X_test[i].reshape(-1, 1)
    y_hat = model.forward(xt)
    print(y_hat, y_test[i])

[[1.43403349e-04]
 [6.18090333e-01]
 [1.62024325e-01]
 [7.40736682e-01]] [0 1 0 0]
[[0.00070835]
 [0.01241954]
 [0.25967948]
 [0.04992158]] [0 0 0 0]
[[0.00203356]
 [0.04917041]
 [0.9526139 ]
 [0.00321786]] [0 0 1 0]
[[4.12123004e-04]
 [8.69369376e-01]
 [9.17228532e-01]
 [1.49322594e-01]] [1 1 1 1]
[[1.43403358e-04]
 [6.18090339e-01]
 [1.62024322e-01]
 [7.40736692e-01]] [1 1 1 1]
[[1.43403374e-04]
 [6.18090352e-01]
 [1.62024317e-01]
 [7.40736712e-01]] [0 1 0 1]
[[1.43403350e-04]
 [6.18090333e-01]
 [1.62024324e-01]
 [7.40736682e-01]] [0 1 1 1]
[[0.00070835]
 [0.01241954]
 [0.25967947]
 [0.04992158]] [0 0 0 0]
[[4.12123004e-04]
 [8.69369376e-01]
 [9.17228532e-01]
 [1.49322594e-01]] [0 1 1 0]
[[0.00203356]
 [0.04917041]
 [0.9526139 ]
 [0.00321786]] [0 0 1 0]
[[0.00203356]
 [0.04917041]
 [0.9526139 ]
 [0.00321786]] [0 0 1 1]
[[0.00070561]
 [0.01256505]
 [0.25940153]
 [0.0503838 ]] [1 0 0 0]
[[2.22976480e-04]
 [2.97202389e-01]
 [1.85627709e-01]
 [4.86437586e-01]] [0 0 0 0]
[[0.00203356]
 [0

In [7]:
# sizes = [5, 6, 7, 2]
# layers = len(sizes)

# weight = [np.random.rand(sizes[i-1], sizes[i]) for i in range(1, layers)]
# bias =  [np.random.rand(s) for s in sizes[1:]]

# print(weight)
# print(bias)
# model = NeuralNetwork(np.array([4, 8, 8, 3]))
# x = np.random.rand(4)
# print("Input:")
# print(x)
# output = model.forward(x)
# print("\nOutput:")
# print(output)

# y = np.array([[1, 2, 3, 5]])
# print(y.shape)
# y = y.reshape(1, 4)
# print(y)

# y = np.array([1, 2, 3, 4])
# print(y.shape)
# # y = y.reshape(1, -1)
# # print(y.shape)

# a = np.zeros((4, 1))
# print(a.shape)
# # print(a.shape == y.shape)

# sub = y - a
# print(sub)

# a = np.array([1, 2, 3, 4])
# diag = np.eye(len(a))
# a = diag * a
# print(a)