In [1]:
import torch

In [5]:
class ActivationSigmoid:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = 1 / (1 + torch.exp(-inputs))

    def backward(self, dvalues):
        sigmoid_derivative = self.output * (1 - self.output)
        self.dinputs = dvalues * sigmoid_derivative

In [6]:
class ActivationReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = torch.maximum(torch.tensor(0), inputs)

    def backward(self, dvalues):
        self.dinputs = dvalues.clone()
        self.dinputs[self.inputs <= 0] = 0

In [7]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.01 * torch.rand(n_inputs, n_neurons)
        self.biases = torch.zeros((1, n_neurons))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = torch.matmul(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        self.dweights = torch.matmul(self.inputs.T, dvalues)
        self.dbiases = torch.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = torch.matmul(dvalues, self.weights.T)

In [8]:
class LossMSE:
    def forward(self, y_pred, y_true):
        return torch.mean((y_pred - y_true) ** 2)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        outputs = len(dvalues[0])
        self.dinputs = 2 * (dvalues - y_true) / outputs
        self.dinputs = self.dinputs / samples

In [None]:
"""
Input: 2, Hidden layer: 2, activation: ReLU, output: 2, loss function: MSE, Learning rate: 0.1, Number of Epochs: 1000
"""
learning_rate = 0.1
epochs = 1000

layer1 = DenseLayer(2, 2)
activation1 = Activation_ReLU()

layer2 = DenseLayer(2, 2)
activation2 = Activation_Sigmoid()

loss_function = Loss_MSE()

X = torch.tensor([[1.0, 2.0], [0.5, -1.0]])
Y = torch.tensor([[1.0, 0.0], [0.0, 1.0]])


layer1.forward(X)
activation1.forward(layer1.output)

layer2.forward(activation1.output)
activation2.forward(layer2.output)

initial_loss = loss_function.forward(activation2.output, Y)
print(f'Initial Loss: {initial_loss}')

for epoch in range(epochs):
    layer1.forward(X)
    activation1.forward(layer1.output)

    layer2.forward(activation1.output)
    activation2.forward(layer2.output)

    loss = loss_function.forward(activation2.output, Y)

    loss_function.backward(activation2.output, Y)
    activation2.backward(loss_function.dinputs)

    layer2.backward(activation2.dinputs)
    activation1.backward(layer2.dinputs)

    layer1.backward(activation1.dinputs)

    layer1.weights -= learning_rate * layer1.dweights
    layer1.biases -= learning_rate * layer1.dbiases

    layer2.weights -= learning_rate * layer2.dweights
    layer2.biases -= learning_rate * layer2.dbiases

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')