## Imports

In [2]:
import numpy as np

from utils.loss import binary_cross_entropy_loss, d_binary_cross_entropy_loss
from utils.number import Number
from utils.activation import *

## Model definition

In [81]:
class Layer():
    def __init__(self, num_prev_neurons, num_neurons, activation="linear"):
        self.num_neurons = num_neurons
        self.num_prev_neurons = num_prev_neurons

        self.activation = activation

        self.weights = np.random.randn(num_prev_neurons, num_neurons)
        self.biases = np.array([Number(i-0.5) for i in np.random.rand(num_neurons)])
    
    def print_params(self):
        print("WEIGHTS:")
        print(self.weights)
        print("BIASES:")
        print(self.biases)

    def forward(self, input):
        lin = input @ self.weights + self.biases

        # storing original input passes for chain rule operations during backprop
        self.original_input = input
        self.lin_pass = lin

        if self.activation == "sigmoid":
            return sigmoid(lin)
        elif self.activation == "tanh":
            return tanh(lin)
        return lin

    def backward(self, prev_chain, lr):
        ''' chain rule:
        dz1_db = 1
        dz1_dw = original_input

        dout_dw = prev_chain*da1_dz1*dz1_dw -- prev_chain = dout_da2*da2_dz2*dz2_da1
        dout_db = prev_chain*da1_dz1*dz1_db -- prev_chain = dout_da2*da2_dz2*dz2_da1

        prev_chain is an array representing the chain rule computed values so far in the subsequent layer
        e.g.
        dout_dw1 = dout_dw3*dw3_dw1 + dout_dw4*dw4_dw1
        dout_db1 = dout_db3*db3_db1 + dout_db4*dw4_db1
        prev_chain would be [[dout_dw3, dout_dw4], [dout_db3, dout_db4]]
        '''
        da = lambda x: x
        if self.activation == "sigmoid":
            da = d_sigmoid
        elif self.activation == "tanh":
            da = d_tanh
        
        prev_chain_dw, prev_chain_db = prev_chain

        dout_dw =  (np.tile(self.original_input, (self.num_neurons, 1)).reshape((self.num_prev_neurons, -1)) @ (prev_chain_dw * da(self.lin_pass))).reshape((self.num_prev_neurons, self.num_neurons))
        dout_db = np.dot(np.tile(da(self.lin_pass), (prev_chain_db.shape[0], 1)).T, prev_chain_db)

        self.weights -= lr * dout_dw
        self.biases -= lr * dout_db

        ''' returning new prev_chain:
        output = a2(z2(a1(z1(input)))) -- e.g. two activation + linear operations

        dout_dw = dout_da2*da2_dz2*dz2_da1*da1_dz1*dz1_dw
        dout_dw = prev_chain*da1_dz1*dz1_dw -- prev_chain = dout_da2*da2_dz2*dz2_da1

        dout_db = dout_da2*da2_dz2*dz2_da1*da1_dz1*dz1_db
        dout_db = prev_chain*da1_dz1*dz1_db -- prev_chain = dout_da2*da2_dz2*dz2_da1
        '''
        return (dout_dw, dout_db)

## Train step function definition

In [58]:
# defining train step function for gradient descent

def forward_pass(model, input):
    logit = input
    for i in range(len(model)):
        logit = model[i].forward(logit)
    return logit

def train_step(model, input, label, lr):
    logit = forward_pass(model, input)

    loss = binary_cross_entropy_loss(logit, label)
    # duplicating to update weights + biases
    prev_chain = np.tile(d_binary_cross_entropy_loss(logit, label), (2, 1))

    for i in range(len(model)):
        prev_chain = model[len(model)-i-1].backward(prev_chain, lr)
    
    return loss

## One neuron perceptron test

### NOT gate task

In [82]:
inputs = [1, 0]
labels = [0, 1]

model = [Layer(1, 1, activation="sigmoid")]

real_input = np.array([inputs[0]])
real_label = np.array([labels[0]])

for i in range(1000):
    loss = train_step(model, real_input, real_label, 0.1)

    real_input = np.array([inputs[i%2]])
    real_label = np.array([labels[i%2]])

    if i % 100 == 0:
        print("Loss: " + str(loss))

print(forward_pass(model, np.array([inputs[0]])))
print(forward_pass(model, np.array([inputs[1]])))

Loss: 0.4375142632327579
Loss: 0.34176956427860294
Loss: 0.23621155401343397
Loss: 0.1770863782685856
Loss: 0.14052307819664078
Loss: 0.1159917996506304
Loss: 0.09850929560680108
Loss: 0.0854713783894216
Loss: 0.0754005490306617
Loss: 0.06740179536363063
[0.03895825]
[0.9409139]


## Multilayer perceptron network test
### NOT gate task

In [84]:
inputs = [1, 0]
labels = [0, 1]

model = [Layer(1, 10, activation="sigmoid"), Layer(10, 10, activation="sigmoid"), Layer(10, 1, activation="sigmoid")]

real_input = np.array([inputs[0]])
real_label = np.array([labels[0]])

for i in range(1000):
    loss = train_step(model, real_input, real_label, 0.1)

    real_input = np.array([inputs[i%2]])
    real_label = np.array([labels[i%2]])

    if i % 100 == 0:
        print("Loss: " + str(loss))

print(forward_pass(model, np.array([inputs[0]])))
print(forward_pass(model, np.array([inputs[1]])))

Loss: 0.13368054873893712
Loss: 0.5359913095532125
Loss: 0.4616217827486787
Loss: 0.257270966902606
Loss: 0.14387368644854448
Loss: 0.08903602230913465
Loss: 0.06084829464398274
Loss: 0.04480855692513374
Loss: 0.034825973867756044
Loss: 0.02815834348847625
[0.06173959]
[0.9768192]
