# HW 1 - Building a network from scratch

* x1, x2, x3 = (1, 2, -1)
* Weights have been initated with 1. value
* Biases have been initated with 0 value
* The reuqired network must include 2 hidden layers, with 2 neurons in each layer
* One dimension output layer
* true y value = 1


# Loading required packages 

In [2]:
import pandas as pd
import numpy as np
import numpy.typing as npt

### Abstract Class - Layer

In [3]:
from abc import ABC, abstractclassmethod

class Layer(ABC):
    def __init__(self):
        self.input = None
        self.output = None
    
    @abstractclassmethod
    def forward_prop(self, input):
        pass
    
    @abstractclassmethod
    def backward_prop(self, input):
        pass

In [52]:
np.zeros(1,)

array([0.])

### Recall - 

    * DE/DX = (DE/DY)*W.T
    * DE/DW = X.T*(DE/DY)
    * DE/DB = DE/DY

### Fully Connected && Activation Layer Classes

In [175]:
class FullyConnectedLayer(Layer):
    def __init__(self, input_size, output_size, hw_1_init: bool = False):
        if hw_1_init:
            self.weights = np.ones((input_size, output_size)) 
            self.bias = np.zeros((1, output_size))
        else:
            self.weights = np.random.rand(input_size, output_size) - 0.5
            self.bias = np.random.rand(1, output_size) - 0.5
        
        
    def forward_prop(self, input_data: npt.ArrayLike) -> npt.ArrayLike:
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output
        
    def backward_prop(self, output_err: npt.ArrayLike, lr: float) -> npt.ArrayLike:
        input_err = np.dot(output_err, self.weights.T)
        weights_err = np.dot(self.input.T, output_err)
        print(f"{'*'*13}\nDLoss/DW:\n{weights_err}\n{'*'*13}\n")
        
        self.weights -= lr * weights_err
        self.bias -= lr * output_err
        return input_err

class ActivationLayer(Layer):
    def __init__(self, activation, d_activation):
        self.activation = activation
        self.d_activation = d_activation
    
    def forward_prop(self, input_data: npt.ArrayLike) -> npt.ArrayLike:
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output
    
    def backward_prop(self, output_err: npt.ArrayLike, lr) -> npt.ArrayLike:
        return self.d_activation(self.input) * output_err

### Network

In [136]:
from __future__ import annotations

class Network:
    
    def __init__(self):
        self.layers = []
        self.loss = None
        self.d_loss = None
    
    def add(self, layer: Layer):
        self.layers.append(layer)
    
    def use(self, loss, d_loss):
        self.loss = loss
        self.d_loss = d_loss
    
    def predict(self, input_data: npt.ArrayLike) -> list:
        samples = len(input_data)
        result = []
    
        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_prop(output)
            result.append(output)
            
        return result
    
    def fit(self, x_train: npt.ArrayLike, y_train: npt.ArrayLike, epochs: int, lr: float):
        samples = len(x_train)
        
        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_prop(output)
                
                err += self.loss(y_train[j], output)
                
                error = self.d_loss(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_prop(error, lr)
            
            err /= samples
            print(f"Epoch {i+1}/{epochs} >> error={err}")
        

### Util Functions

In [137]:
def ReLU(x):
    return np.clip(x, 0, None)

def d_ReLU(x):
    x[x<=0] = 0
    x[x>0] = 1
    return x

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def d_sigmoid(x):
    return (1 - sigmoid(x)) * sigmoid(x)

def mse(y_true, y_pred):
    return np.mean(np.power(y_true-y_pred, 2))

def d_mse(y_true, y_pred):
    return 2*(y_pred-y_true)/y_true.size

### My Inputs

In [138]:
ACT_FUNCTIONS = {
    'ReLU': (ReLU, d_ReLU),
    'sigmoid': (sigmoid, d_sigmoid)
}

LEARNING_RATE = 0.1
EPOCHS = 1

### Network Inititation - Home Work I

In [182]:
def train_hw1_net(activation_function: str = 'ReLU') -> None:
    """
    Train the required net from HW1 Q2. During the training pipeline all required 
    outputs will be printed.
    """
    assert activation_function in ACT_FUNCTIONS.keys(), \
        "The provided activation function is available" 
        
    X_TRAIN = np.array([[[1, 2, -1]]])
    Y_TRAIN = np.array([[[0]]])
    ACT_FUNC = ACT_FUNCTIONS[activation_function]

    net_hw1 = Network()
    net_hw1.add(FullyConnectedLayer(3, 2, hw_1_init=True))
    net_hw1.add(ActivationLayer(*ACT_FUNC))
    net_hw1.add(FullyConnectedLayer(2, 2, hw_1_init=True))
    net_hw1.add(ActivationLayer(*ACT_FUNC))
    net_hw1.add(FullyConnectedLayer(2, 1, hw_1_init=True))

    net_hw1.use(mse, d_mse)
    net_hw1.fit(X_TRAIN, Y_TRAIN, epochs=EPOCHS, lr=LEARNING_RATE)
    
    ctr = 1
    for i, layer in enumerate(net_hw1.layers):
        if i in [0, 2, 4]:
            print(f"{'*'*13}")
            print(f'{" "*3}Layer {ctr}')
            print(f"{'*'*13}\n")
            print(f'Weights:\n{layer.weights}')
            print(f"{'-'*13}")
            print(f"Biases:\n{layer.bias}\n")
            ctr += 1

### ReLU

In [185]:
train_hw1_net(activation_function='ReLU')

*************
DLoss/DW:
[[64.]
 [64.]]
*************

*************
DLoss/DW:
[[32. 32.]
 [32. 32.]]
*************

*************
DLoss/DW:
[[ 32.  32.]
 [ 64.  64.]
 [-32. -32.]]
*************

Epoch 1/1 >> error=64.0
*************
   Layer 1
*************

Weights:
[[-2.2 -2.2]
 [-5.4 -5.4]
 [ 4.2  4.2]]
-------------
Biases:
[[-3.2 -3.2]]

*************
   Layer 2
*************

Weights:
[[-2.2 -2.2]
 [-2.2 -2.2]]
-------------
Biases:
[[-1.6 -1.6]]

*************
   Layer 3
*************

Weights:
[[-5.4]
 [-5.4]]
-------------
Biases:
[[-1.6]]



### Sigmoid

In [186]:
train_hw1_net(activation_function='sigmoid')

*************
DLoss/DW:
[[2.91322908]
 [2.91322908]]
*************

*************
DLoss/DW:
[[0.37614665 0.37614665]
 [0.37614665 0.37614665]]
*************

*************
DLoss/DW:
[[ 0.08967556  0.08967556]
 [ 0.17935112  0.17935112]
 [-0.08967556 -0.08967556]]
*************

Epoch 1/1 >> error=2.9132290817853628
*************
   Layer 1
*************

Weights:
[[0.99103244 0.99103244]
 [0.98206489 0.98206489]
 [1.00896756 1.00896756]]
-------------
Biases:
[[-0.00896756 -0.00896756]]

*************
   Layer 2
*************

Weights:
[[0.96238533 0.96238533]
 [0.96238533 0.96238533]]
-------------
Biases:
[[-0.04270526 -0.04270526]]

*************
   Layer 3
*************

Weights:
[[0.70867709]
 [0.70867709]]
-------------
Biases:
[[-0.34136368]]

