In [55]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [56]:
X = np.array([[0, 0],
              [0, 1], 
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1], 
              [0]])

In [57]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [58]:
def MSE(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [59]:
def dC_da(a, y_true):
    return 2*(a- y_true)

In [60]:
def da_dz(z):
    return sigmoid(z) * (1-sigmoid(z))

In [61]:
def xavier_normal(fan_in, fan_out):
    std = np.sqrt(2.0 / (fan_in + fan_out))
    return np.random.normal(0, std, (fan_out, fan_in)).T

In [62]:
class Layer:
    def __init__(self, input_size: int, neuron_count: int):
        self.W = xavier_normal(input_size, neuron_count)
        self.b = np.zeros((1, neuron_count))
        
        self.z = None
        self.inputs = None
        self.activations = None
        
    def forward(self, X: np.ndarray):   
        self.inputs = X
        self.z = X @ self.W + self.b
        a = sigmoid(self.z)
        self.activations = a
        return a
    
    def backward(self, delta, learning_rate):
        dz = np.array([])

        dz = delta * da_dz(self.z)
        
        dW = self.inputs.T @ dz
        db = np.sum(dz, axis=0, keepdims=True)
        
        delta_prev = dz @ self.W.T
        
        self.W -= learning_rate * dW
        self.b -= learning_rate * db
        
        return delta_prev
    
            
        
        
        

In [63]:
layer = Layer(input_size=2, neuron_count=3)
layer.forward(X)

array([[0.5       , 0.5       , 0.5       ],
       [0.25054281, 0.48878704, 0.55828875],
       [0.63807628, 0.40874831, 0.5253737 ],
       [0.3708213 , 0.39795301, 0.58317042]])

In [64]:
from typing import List


class Network:
    def __init__(self, layers: List[Layer]):
        self.layers = layers
    
    def forward(self, X: np.ndarray):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backwards(self, y_true, learning_rate):
        
        y_pred = self.layers[-1].activations
                
        output_delta = (y_pred - y_true) * da_dz(self.layers[-1].z)
        
        current_delta = output_delta
        
        for layer in reversed(self.layers):
            current_delta = layer.backward(current_delta, learning_rate)
                    
    def train(self, X, y, epochs, learning_rate):
        loss = 0
        for epoch in range(epochs):
            y_pred = self.forward(X)
            
            loss = MSE(y, y_pred)
            
            self.backwards(y, learning_rate)
            
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.6f}")
                
        return loss
                   
            
layer1 = Layer(input_size=2, neuron_count=2)
layer2 = Layer(input_size=2, neuron_count=1)

layers = [layer1, layer2]
network = Network(layers)

network.train(X, y, 50000, 1)

Epoch 0, Loss: 0.251125
Epoch 100, Loss: 0.250247
Epoch 200, Loss: 0.249619
Epoch 300, Loss: 0.248781
Epoch 400, Loss: 0.247268
Epoch 500, Loss: 0.244300
Epoch 600, Loss: 0.238834
Epoch 700, Loss: 0.230000
Epoch 800, Loss: 0.217808
Epoch 900, Loss: 0.203842
Epoch 1000, Loss: 0.189779
Epoch 1100, Loss: 0.175226
Epoch 1200, Loss: 0.157953
Epoch 1300, Loss: 0.135422
Epoch 1400, Loss: 0.108281
Epoch 1500, Loss: 0.082197
Epoch 1600, Loss: 0.062044
Epoch 1700, Loss: 0.048097
Epoch 1800, Loss: 0.038621
Epoch 1900, Loss: 0.032031
Epoch 2000, Loss: 0.027285
Epoch 2100, Loss: 0.023746
Epoch 2200, Loss: 0.021024
Epoch 2300, Loss: 0.018873
Epoch 2400, Loss: 0.017136
Epoch 2500, Loss: 0.015704
Epoch 2600, Loss: 0.014506
Epoch 2700, Loss: 0.013488
Epoch 2800, Loss: 0.012613
Epoch 2900, Loss: 0.011853
Epoch 3000, Loss: 0.011187
Epoch 3100, Loss: 0.010598
Epoch 3200, Loss: 0.010073
Epoch 3300, Loss: 0.009603
Epoch 3400, Loss: 0.009179
Epoch 3500, Loss: 0.008794
Epoch 3600, Loss: 0.008444
Epoch 3700, L

np.float64(0.0007494720056401664)

In [65]:
from sklearn.datasets import load_iris

iris = load_iris()

In [66]:
iris_X = iris.data # Features (4 columns: sepal length/width, petal length/width)
iris_y = iris.target # Labels (0=setosa, 1=versicolor, 2=virginica)

In [67]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
iris_y = encoder.fit_transform(iris_y.reshape(-1, 1))

In [69]:
iris_layer1 = Layer(input_size=4, neuron_count=8) 
iris_layer2 = Layer(input_size=8, neuron_count=3) 


iris_layers = [iris_layer1, iris_layer2]
iris_network = Network(iris_layers)

iris_network.train(iris_X, iris_y, 100000, 0.1)

Epoch 0, Loss: 0.244241
Epoch 100, Loss: 0.096171
Epoch 200, Loss: 0.043347
Epoch 300, Loss: 0.050096
Epoch 400, Loss: 0.033815
Epoch 500, Loss: 0.025765
Epoch 600, Loss: 0.021983
Epoch 700, Loss: 0.019923
Epoch 800, Loss: 0.018577
Epoch 900, Loss: 0.017597
Epoch 1000, Loss: 0.016834
Epoch 1100, Loss: 0.016211
Epoch 1200, Loss: 0.015684
Epoch 1300, Loss: 0.015227
Epoch 1400, Loss: 0.014820
Epoch 1500, Loss: 0.014454
Epoch 1600, Loss: 0.014119
Epoch 1700, Loss: 0.013810
Epoch 1800, Loss: 0.013523
Epoch 1900, Loss: 0.013257
Epoch 2000, Loss: 0.013010
Epoch 2100, Loss: 0.012781
Epoch 2200, Loss: 0.012570
Epoch 2300, Loss: 0.012376
Epoch 2400, Loss: 0.012196
Epoch 2500, Loss: 0.012028
Epoch 2600, Loss: 0.011870
Epoch 2700, Loss: 0.011723
Epoch 2800, Loss: 0.011584
Epoch 2900, Loss: 0.011452
Epoch 3000, Loss: 0.011328
Epoch 3100, Loss: 0.010113
Epoch 3200, Loss: 0.009933
Epoch 3300, Loss: 0.009782
Epoch 3400, Loss: 0.009650
Epoch 3500, Loss: 0.009533
Epoch 3600, Loss: 0.011097
Epoch 3700, L

np.float64(0.004531507877585569)