In [96]:
import numpy as np
import pandas as pd

In [97]:
X = np.array([[0, 0],
              [0, 1], 
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1], 
              [0]])

In [98]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [99]:
def MSE(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [100]:
def dC_da(a, y_true):
    return 2*(a- y_true)

In [101]:
def da_dz(z):
    return sigmoid(z) * (1-sigmoid(z))

In [102]:
def xavier_normal(fan_in, fan_out):
    std = np.sqrt(2.0 / (fan_in + fan_out))
    return np.random.normal(0, std, (fan_out, fan_in)).T

In [103]:
class Layer:
    def __init__(self, input_size: int, neuron_count: int):
        self.W = xavier_normal(input_size, neuron_count)
        self.b = np.zeros((1, neuron_count))
        
        self.z = None
        self.inputs = None
        self.activations = None
        
    def forward(self, X: np.ndarray):   
        self.inputs = X
        self.z = X @ self.W + self.b
        a = sigmoid(self.z)
        self.activations = a
        return a
    
    def backward(self, delta, learning_rate):
        dz = np.array([])
        

        dz = delta * da_dz(self.z)
        
        dW = self.inputs.T @ dz
        db = np.sum(dz, axis=0, keepdims=True)
        
        delta_prev = dz @ self.W.T
        
        self.W -= learning_rate * dW
        self.b -= learning_rate * db
        
        return delta_prev
    
            
        
        
        

In [104]:
layer = Layer(input_size=2, neuron_count=3)
layer.forward(X)

array([[0.5       , 0.5       , 0.5       ],
       [0.52263993, 0.52716085, 0.62559217],
       [0.29069861, 0.47483811, 0.4402997 ],
       [0.30973215, 0.50200444, 0.56792916]])

In [None]:
from typing import List


class Network:
    def __init__(self, layers: List[Layer]):
        self.layers = layers
    
    def forward(self, X: np.ndarray):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backwards(self, X, y_true, learning_rate):
        
        y_pred = self.forward(X)
        
        output_delta = (y_pred - y_true) * da_dz(self.layers[-1].z)
        
        current_delta = output_delta
        
        for layer in reversed(self.layers):
            current_delta = layer.backward(current_delta, learning_rate)
                    
    def train(self, X, y, epochs, learning_rate):
        loss = 0
        for epoch in range(epochs):
            y_pred = self.forward(X)
            
            loss = MSE(y, y_pred)
            
            self.backwards(X, y, learning_rate)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.6f}")
                
        return loss
                   
            
layer1 = Layer(input_size=2, neuron_count=2)
layer2 = Layer(input_size=2, neuron_count=2) 
layer3 = Layer(input_size=2, neuron_count=1)

layers = [layer1, layer2]
network = Network(layers)

network.train(X, y, 20000, 0.1)

Epoch 0, Loss: 0.262553
Epoch 100, Loss: 0.250342
Epoch 200, Loss: 0.248232
Epoch 300, Loss: 0.247665
Epoch 400, Loss: 0.247226
Epoch 500, Loss: 0.246756
Epoch 600, Loss: 0.246232
Epoch 700, Loss: 0.245648
Epoch 800, Loss: 0.244995
Epoch 900, Loss: 0.244271
Epoch 1000, Loss: 0.243469
Epoch 1100, Loss: 0.242587
Epoch 1200, Loss: 0.241622
Epoch 1300, Loss: 0.240572
Epoch 1400, Loss: 0.239436
Epoch 1500, Loss: 0.238210
Epoch 1600, Loss: 0.236894
Epoch 1700, Loss: 0.235483
Epoch 1800, Loss: 0.233976
Epoch 1900, Loss: 0.232369
Epoch 2000, Loss: 0.230660
Epoch 2100, Loss: 0.228847
Epoch 2200, Loss: 0.226928
Epoch 2300, Loss: 0.224905
Epoch 2400, Loss: 0.222780
Epoch 2500, Loss: 0.220557
Epoch 2600, Loss: 0.218243
Epoch 2700, Loss: 0.215846
Epoch 2800, Loss: 0.213378
Epoch 2900, Loss: 0.210853
Epoch 3000, Loss: 0.208285
Epoch 3100, Loss: 0.205690
Epoch 3200, Loss: 0.203083
Epoch 3300, Loss: 0.200482
Epoch 3400, Loss: 0.197901
Epoch 3500, Loss: 0.195356
Epoch 3600, Loss: 0.192857
Epoch 3700, L

np.float64(0.1390640570850421)