In [2]:
import numpy as np
import pandas as pd




In [46]:
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1-np.tanh(x)**2

def relu(x):
    return np.maximum(0,x)

def relu_prime(x):
    return np.where(x>0,1,0)

def mse(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

def mse_prime(y_true, y_pred):
    # print(np.mean(2*(y_pred - y_true)))
    return np.mean(2*(y_pred - y_true))

# Sigmoid function and its derivative   

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

class Layer:
    def __init__(self) -> None:
        self.X = None
        self.y = None
        
    def forward(self, input):
        pass
    
    def backward(self, output_grad, lam):
        pass
         
         
class Dense(Layer):
    def __init__(self, input_size, output_size) -> None:
        self.weights = np.random.randn(output_size,input_size)*np.sqrt(1/input_size)
        self.bias = np.random.randn(output_size,1)
        
    def forward(self, input):
        self.input = input
        return np.dot(self.weights, input) + self.bias
    
    def backward(self, output_grad, lam):
        # print("out, self.input", output_grad.shape, self.input.shape)
        weights_grad = np.dot(output_grad, self.input.T)
        tbr = np.dot(self.weights.T, output_grad)
        self.weights -= lam * weights_grad
        self.bias -= lam * output_grad.mean(axis=1, keepdims=True)
        return tbr
        
        
class Activation(Layer):
    def __init__(self, activation, activation_prime ) -> None:
        self.activation = activation
        self.activation_prime = activation_prime 
    
    def forward(self, input):
        self.input = input
        return self.activation(input)
    
    def backward(self, output_grad, lam):
        return output_grad * self.activation_prime(self.input)
    
class Output(Layer):
    def __init__(self, y_true, y_pred, loss, lossprime) -> None:
        self.y_true = y_true
        self.y_pred = y_pred
        
    def forward(self, input):
        return mse(self.y_true, self.y_pred)
    
    def backward(self, output_grad, lam):
        return mse_prime(self.y_true, self.y_pred)
    
        
    

In [50]:
# XOR problem

X = np.array([[0,0],[0,1],[1,0],[1,1]]).reshape((4,2,1))

y = np.array([[0,1,1,0]]).reshape((4,1,1))

# Model
network = [
    Dense(2, 3),
    Activation(tanh, tanh_prime),
    Dense(3, 1),
    Activation(tanh, tanh_prime),
  
]

epochs = 1000
lr = 0.1

for i in range(epochs):
    error = 0 
    for x, yy in zip(X,y):
        output = x
        for layer in network:
            output = layer.forward(output)
        error += mse(yy, output)
        error_grad = mse_prime(yy, output)
        for layer in reversed(network):
            error_grad = layer.backward(error_grad, lr)
    error /= len(X)
    print(f"Epoch: {i}, Error: {error}")

Epoch: 0, Error: 0.4699630001655388
Epoch: 1, Error: 0.3418922923905229
Epoch: 2, Error: 0.32718229334695353
Epoch: 3, Error: 0.32201917929682394
Epoch: 4, Error: 0.31904806791789075
Epoch: 5, Error: 0.31693743627237314
Epoch: 6, Error: 0.3152890062090073
Epoch: 7, Error: 0.3139395340212417
Epoch: 8, Error: 0.3128048639856282
Epoch: 9, Error: 0.31183376282197267
Epoch: 10, Error: 0.31099141913180617
Epoch: 11, Error: 0.3102524256443601
Epoch: 12, Error: 0.3095972444821695
Epoch: 13, Error: 0.30901017111019996
Epoch: 14, Error: 0.3084780624231942
Epoch: 15, Error: 0.3079895149271263
Epoch: 16, Error: 0.3075343350143524
Epoch: 17, Error: 0.30710320890455967
Epoch: 18, Error: 0.30668751133524813
Epoch: 19, Error: 0.3062792093029746
Epoch: 20, Error: 0.3058708278284995
Epoch: 21, Error: 0.30545545226100324
Epoch: 22, Error: 0.305026747649479
Epoch: 23, Error: 0.3045789809007513
Epoch: 24, Error: 0.30410703604269423
Epoch: 25, Error: 0.3036064169081631
Epoch: 26, Error: 0.30307323485658066
