In [1]:
import numpy as np

In [1]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)


In [None]:
class Layer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))
        
    def forward(self, X):
        self.X = X

        self.z = np.dot(X, self.weights) + self.bias
        self.a = relu(self.z)
        
        return self.a
    
    def backward(self, grad, learning_rate):
        delta = grad * relu_derivative(self.z)
        dW = np.dot(self.X.T, delta)
        db = np.sum(delta, axis=0, keepdims=True)
        
        self.weights -= learning_rate * dW
        self.bias -= learning_rate * db
        
        grad_prev = np.dot(delta, self.weights.T)
        return grad_prev

In [None]:
class OutputLayer(Layer):
    def forward(self, X):
        self.X = X
        self.z = np.dot(X, self.weights) + self.bias
        return self.z  
    def backward(self, grad, learning_rate):
        dW = np.dot(self.X.T, grad)
        db = np.sum(grad, axis=0, keepdims=True)
        
        self.weights -= learning_rate * dW
        self.bias -= learning_rate * db
        
        return np.dot(grad, self.weights.T)

In [None]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.layers = []
        
        sizes = [input_size] + hidden_sizes
        for i in range(len(hidden_sizes)):
            self.layers.append(Layer(sizes[i], sizes[i+1]))
        
        self.layers.append(OutputLayer(hidden_sizes[-1], output_size))
    
    def forward(self, X):
        a = X
        for layer in self.layers:
            a = layer.forward(a)
        return a
    
    def backward(self, X, y, learning_rate):
        y_pred = self.forward(X)
        
        m = X.shape[0]
        grad = (y_pred - y) / m
        
        for layer in reversed(self.layers):
            grad = layer.backward(grad, learning_rate)