In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
X = np.array([[0, 0],
              [0, 1], 
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1], 
              [0]])

In [3]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [4]:
def MSE(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

In [5]:
def dC_da(a, y_true):
    return 2*(a- y_true)

In [6]:
def da_dz(z):
    return sigmoid(z) * (1-sigmoid(z))

In [7]:
def xavier_normal(fan_in, fan_out):
    std = np.sqrt(2.0 / (fan_in + fan_out))
    return np.random.normal(0, std, (fan_out, fan_in)).T

In [8]:
class Layer:
    def __init__(self, input_size: int, neuron_count: int):
        self.W = xavier_normal(input_size, neuron_count)
        self.b = np.zeros((1, neuron_count))
        
        self.z = None
        self.inputs = None
        self.activations = None
        
    def forward(self, X: np.ndarray):   
        self.inputs = X
        self.z = X @ self.W + self.b
        a = sigmoid(self.z)
        self.activations = a
        return a
    
    def backward(self, delta, learning_rate):
        dz = np.array([])

        dz = delta * da_dz(self.z)
        
        dW = self.inputs.T @ dz
        db = np.sum(dz, axis=0, keepdims=True)
        
        delta_prev = dz @ self.W.T
        
        self.W -= learning_rate * dW
        self.b -= learning_rate * db
        
        return delta_prev
    
            
        
        
        

In [9]:
layer = Layer(input_size=2, neuron_count=3)
layer.forward(X)

array([[0.5       , 0.5       , 0.5       ],
       [0.30406162, 0.33202003, 0.54634973],
       [0.48018138, 0.40963327, 0.6682946 ],
       [0.28754308, 0.25644194, 0.70814971]])

In [10]:
from typing import List


class Network:
    def __init__(self, layers: List[Layer]):
        self.layers = layers
    
    def forward(self, X: np.ndarray):
        for layer in self.layers:
            X = layer.forward(X)
        return X

    def backwards(self, y_true, learning_rate):
        
        y_pred = self.layers[-1].activations
                
        output_delta = (y_pred - y_true) * da_dz(self.layers[-1].z)
        
        current_delta = output_delta
        
        for layer in reversed(self.layers):
            current_delta = layer.backward(current_delta, learning_rate)
                    
    def train(self, X, y, epochs, learning_rate):
        loss = 0
        for _ in range(epochs):

            self.forward(X)
            self.backwards(y, learning_rate)

        y_pred = self.forward(X)
            
        loss = MSE(y, y_pred)

        return loss
            
layer1 = Layer(input_size=2, neuron_count=2)
layer2 = Layer(input_size=2, neuron_count=1)

layers = [layer1, layer2]
network = Network(layers)

network.train(X, y, 50000, 1)

np.float64(0.0011101072857388934)

In [11]:
from sklearn.datasets import load_iris

iris = load_iris()

In [12]:
iris_X = iris.data # Features (4 columns: sepal length/width, petal length/width)
iris_y = iris.target # Labels (0=setosa, 1=versicolor, 2=virginica)

In [13]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse_output=False)
iris_y = encoder.fit_transform(iris_y.reshape(-1, 1))

In [14]:
iris_layer1 = Layer(input_size=4, neuron_count=8) 
iris_layer2 = Layer(input_size=8, neuron_count=3) 


iris_layers = [iris_layer1, iris_layer2]
iris_network = Network(iris_layers)

iris_network.train(iris_X, iris_y, 100000, 0.1)

np.float64(0.004572954842015674)