# HW 1 - Building a network from scratch

* x1, x2, x3 = (1, 2, -1)
* Weights have been initated with 1. value
* Biases have been initated with 0 value
* The reuqired network must include 2 hidden layers, with 2 neurons in each layer
* One dimension output layer
* true y value = 1


# Loading required packages 

In [None]:
import pandas as pd
import numpy as np
import numpy.typing as npt

# Utils functions

In [230]:
def ReLU(x):
    return np.clip(x, 0, None)

def d_ReLU(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)

def loss(y_pred: npt.ArrayLike, y_true: npt.ArrayLike):
    return ((y_pred - y_true) ** 2).mean()

def d_loss(y_pred: npt.ArrayLike, y_true: npt.ArrayLike):
    n = y_true.shape[0]
    gradient = 2. * (y_pred - y_true) / n
    return gradient



# The *Layer* class - OOP

 * The learning rate is set to 0.01
 * The activation function is ReLU

In [231]:
class Layer:

    activationFunctions = {
        'ReLU': (ReLU, d_ReLU),
        'sigmoid': (sigmoid, d_sigmoid)
    }
    
    lr = 0.1
    
    def __init__(self,
                 neurons: int,
                 inputs: int,
                 activation: str = 'ReLU',
                 init_w_with_1: bool = False):
        """
        The layer instance should be initiated with the input count and number of neurons.
        """
        if init_w_with_1:
            self.W = np.ones((neurons, inputs))
        else:
            self.W = np.random.randn(neurons, inputs)
        self.b = np.zeros((neurons, 1))
        self.act, self.d_act = self.activationFunctions.get(activation)
        
    def forward(self, A_prev: npt.ArrayLike) -> npt.ArrayLike:
        """
        Each forward step will calculate the next layer neurin as a function of its prev input and current 
        weight dot product, on top of that, the activation function will be applied.
        """
        self.A_prev = A_prev
        self.Z = np.dot(self.W, self.A_prev) + self.b
        self.A = self.act(self.Z)
        return self.A
        
    def backprop(self, dA: npt.ArrayLike) -> npt.ArrayLike:
        dZ = np.multiply(self.d_act(self.Z), dA)
        dW = 1/dZ.shape[1] * np.dot(dZ, self.A_prev.T)
        db = 1/dZ.shape[1] * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(self.W.T, dZ)
        
        self.W = self.W - self.lr * dW
        self.b = self.b - self.lr * db
        
        return dA_prev

In [229]:
def logit(index, text, value = None, add_sep: bool = False):
    print(f"{text} {index}: \n {value if value is not None else ''}")
    if add_sep:
        print(f"{'*'*50}")

# Given inputs

x_train = np.array([[1.0], [2.0], [-1.0]])
y_train = np.array([[0]]) 

m = 1
epochs = 1

layers = [Layer(2, 3, 'ReLU'), Layer(2, 2, 'ReLU'), Layer(1, 2, 'ReLU')]
costs = []

for i, epoch in enumerate(range(epochs)):
    logit(i, "Epoch", add_sep=True)
    # FORWARD
    A = x_train
    for j, layer in enumerate(layers):
        A = layer.forward(A)
        logit(j, "Layer", A)
    
    # COSTS
    cost = loss(y_train, A)
    logit(i, "Epoch Cost", cost)
    costs.append(cost)
    
    # BACKPROP
    dA = d_loss(y_train, A)
    for k, layer in enumerate(reversed(layers)):
        dA = layer.backprop(dA)

print(f"{'*'*50}\nFinal param values:\n{'*'*50}\n")
print(f"Weight: {layer.W}")
print(f"Biases: {layer.b}")

Epoch 0: 
 
**************************************************
Layer 0: 
 [[2.]
 [2.]]
Layer 1: 
 [[4.]
 [4.]]
Layer 2: 
 [[8.]]
Epoch Cost 0: 
 64.0
**************************************************
Final param values:
**************************************************

Weight: [[ 4.2  7.4 -2.2]
 [ 4.2  7.4 -2.2]]
Biases: [[3.2]
 [3.2]]


### Abstract Class - Layer

In [367]:
from abc import ABC, abstractclassmethod

class Layer(ABC):
    def __init__(self):
        self.input = None
        self.output = None
    
    @abstractclassmethod
    def forward_prop(self, input):
        pass
    
    @abstractclassmethod
    def backward_prop(self, input):
        pass

In [375]:
weights = np.ones((3, 2)) 
weights
# # np.random.rand(1, output_size) - 0.5
bias = np.ones((1, 1)) 
bias

array([[1.]])

### Recall - 

    * DE/DX = (DE/DY)*W.T
    * DE/DW = X.T*(DE/DY)
    * DE/DB = DE/DY

### Fully Connected && Activation Layer Classes

In [526]:
import numpy as np
import numpy.typing as npt

class ActivationLayer(Layer):
    def __init__(self, activation, d_activation):
        self.activation = activation
        self.d_activation = d_activation
    
    def forward_prop(self, input_data: npt.ArrayLike) -> npt.ArrayLike:
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output
    
    def backward_prop(self, output_err: npt.ArrayLike, lr) -> npt.ArrayLike:
        return self.d_activation(self.input) * output_err
    
    
class FullyConnectedLayer(Layer):
    def __init__(self, input_size, output_size, hw_1_init: bool = False):
        if hw_1_init:
            self.weights = np.ones((input_size, output_size)) 
            self.bias = np.zeros((1, 1))
        else:
            self.weights = np.random.rand(input_size, output_size) - 0.5
            self.bias = np.random.rand(1, output_size) - 0.5
        
        
    def forward_prop(self, input_data: npt.ArrayLike) -> npt.ArrayLike:
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
        
    def backward_prop(self, output_err: npt.ArrayLike, lr: float) -> npt.ArrayLike:
        input_err = np.dot(output_err, self.weights.T)
        weights_err = np.dot(self.input.T, output_err)
        
        self.weights -= lr * weights_err
        self.bias -= lr * output_err
        return input_err

### Network

In [498]:
from __future__ import annotations

class Network:
    
    def __init__(self):
        self.layers = []
        self.loss = None
        self.d_loss = None
    
    def add(self, layer: Layer):
        self.layers.append(layer)
    
    def use(self, loss, d_loss):
        self.loss = loss
        self.d_loss = d_loss
    
    def predict(self, input_data: npt.ArrayLike) -> list:
        samples = len(input_data)
        result = []
    
        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_prop(output)
            result.append(output)
            
        return result
    
    def fit(self, x_train: npt.ArrayLike, y_train: npt.ArrayLike, epochs: int, lr: float):
        samples = len(x_train)
        
        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_prop(output)
                
                err += self.loss(y_train[j], output)
                
                error = self.d_loss(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_prop(error, lr)
            
            err /= samples
            print(f"Epoch {i+1}/{epochs} >> error={err}")
        

### Util Functions

In [499]:
def ReLU(x):
    return np.clip(x, 0, None)

def d_ReLU(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def d_mse(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

### My Inputs

In [500]:
ACT_FUNCTIONS = {
    'ReLU': (ReLU, d_ReLU),
    'sigmoid': (sigmoid, d_sigmoid)
}

LEARNING_RATE = 0.1
EPOCHS = 1

### Network Inititation - General Example

In [501]:
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])

net = Network()
net.add(FullyConnectedLayer(2, 3))
net.add(ActivationLayer(*ACT_FUNCTIONS['ReLU']))
net.add(FullyConnectedLayer(3, 1))
net.add(ActivationLayer(*ACT_FUNCTIONS['ReLU']))

net.use(mse, d_mse)
net.fit(x_train, y_train, epochs=EPOCHS, lr=LEARNING_RATE)

Epoch 1/1 >> error=0.5


### Network Inititation - Home Work I

In [534]:
x_train_hw = np.array([[1], [2], [-1]])
y_train_hw = np.array([[0]])


net_hw1 = Network()
net_hw1.add(FullyConnectedLayer(3, 2, hw_1_init=True))
net_hw1.add(ActivationLayer(*ACT_FUNCTIONS['ReLU']))
net_hw1.add(FullyConnectedLayer(2, 2, hw_1_init=True))
net_hw1.add(ActivationLayer(*ACT_FUNCTIONS['ReLU']))

net_hw1.use(mse, d_mse)
net_hw1.fit(x_train_hw, y_train_hw, epochs=EPOCHS, lr=LEARNING_RATE)

ValueError: shapes (1,) and (3,2) not aligned: 1 (dim 0) != 3 (dim 0)

In [527]:
np.random.rand(input_size, output_size) - 0.5

array([[ 0.1764067 ,  0.09390614],
       [ 0.00358406, -0.46632579],
       [ 0.33714902,  0.00653893]])

In [531]:
input_size = 3
output_size = 2
x_train_hw = np.array([[1], [2], [-1]]).T
np.dot(x_train_hw,  np.ones((input_size, output_size))) + np.ones((1, 1))


array([[3., 3.]])

In [519]:
x_train_hw.shape
np.ones((input_size, output_size)).shape

(3, 2)

3

In [525]:
np.dot(x_train_hw.T,  np.ones((input_size, output_size))) + np.zeros((1, 1))

array([[2., 2.]])