In [32]:
import numpy as np
import pandas as pd

In [33]:
X = np.array([[0, 0],
              [0, 1], 
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1], 
              [0]])

In [34]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [35]:
def MSE(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [36]:
def dC_da(a, y_true):
    return 2*(a- y_true)

In [37]:
def da_dz(z):
    return sigmoid(z) * (1-sigmoid(z))

In [None]:
class Layer:
    def __init__(self, input_size: int, neuron_count: int):
        self.W = np.random.randn(input_size, neuron_count)
        self.b = np.zeros((1, neuron_count))
        
        self.z = None
        self.inputs = None
        self.activations = None
        
    def forward(self, X: np.ndarray):   
        self.inputs = X
        self.z = X @ self.W + self.b
        a = sigmoid(self.z)
        self.activations = a
        return a
    
    def backward(self, delta, learning_rate):
        dz = np.array([])
        

        dz = delta * da_dz(self.z)
        
        dW = self.inputs.T @ dz
        db = np.sum(dz, axis=0, keepdims=True)
        
        delta_prev = dz @ self.W.T
        
        self.W -= learning_rate * dW
        self.b -= learning_rate * db
        
        return delta_prev
    
            
        
        
        

In [39]:
layer = Layer(input_size=2, neuron_count=3)
layer.forward(X)

array([[0.5       , 0.5       , 0.5       ],
       [0.47854632, 0.4531206 , 0.47137274],
       [0.67499085, 0.52453484, 0.27968064],
       [0.65587796, 0.47755216, 0.2571794 ]])

In [40]:
from typing import List


class Network:
    def __init__(self, layers: List[Layer]):
        self.layers = layers
    
    def forward(self, X: np.ndarray):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backwards(self, X, y_true, learning_rate):
        
        y_pred = self.forward(X)
        
        output_delta = (y_pred - y_true) * da_dz(self.layers[-1].z)
        
        current_delta = output_delta
        
        for layer in reversed(self.layers):
            current_delta = layer.backward(current_delta, learning_rate)
                    
    def train(self, X, y, epochs, learning_rate):
        loss = 0
        for epoch in range(epochs):
            y_pred = self.forward(X)
            
            loss = np.mean((y_pred - y) ** 2)
            
            self.backwards(X, y, learning_rate)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.6f}")
                
        return loss
                   
            
layer1 = Layer(input_size=2, neuron_count=2) 
layer2 = Layer(input_size=2, neuron_count=1)   

layers = [layer1, layer2]
network = Network(layers)

network.train(X, y, 10000, 5)

Epoch 0, Loss: 0.254471
Epoch 100, Loss: 0.242544
Epoch 200, Loss: 0.194102
Epoch 300, Loss: 0.089168
Epoch 400, Loss: 0.027305
Epoch 500, Loss: 0.015566
Epoch 600, Loss: 0.011082
Epoch 700, Loss: 0.008718
Epoch 800, Loss: 0.007250
Epoch 900, Loss: 0.006246
Epoch 1000, Loss: 0.005512
Epoch 1100, Loss: 0.004950
Epoch 1200, Loss: 0.004505
Epoch 1300, Loss: 0.004143
Epoch 1400, Loss: 0.003843
Epoch 1500, Loss: 0.003588
Epoch 1600, Loss: 0.003370
Epoch 1700, Loss: 0.003181
Epoch 1800, Loss: 0.003015
Epoch 1900, Loss: 0.002868
Epoch 2000, Loss: 0.002737
Epoch 2100, Loss: 0.002619
Epoch 2200, Loss: 0.002513
Epoch 2300, Loss: 0.002416
Epoch 2400, Loss: 0.002328
Epoch 2500, Loss: 0.002246
Epoch 2600, Loss: 0.002172
Epoch 2700, Loss: 0.002103
Epoch 2800, Loss: 0.002039
Epoch 2900, Loss: 0.001979
Epoch 3000, Loss: 0.001923
Epoch 3100, Loss: 0.001871
Epoch 3200, Loss: 0.001823
Epoch 3300, Loss: 0.001777
Epoch 3400, Loss: 0.001734
Epoch 3500, Loss: 0.001693
Epoch 3600, Loss: 0.001654
Epoch 3700, L

np.float64(0.0007496277611177607)