In [2]:
import torch

In [3]:
class ActivationSigmoid:
    def forward(self, inputs):
        self.output = 1/(1+torch.exp(-inputs))

In [4]:
class ActivationLinear:
    def forward(self, inputs):
        self.output = inputs

In [12]:
class DenseLayer:
    def __init__(self, n_features, n_neurons, activation_fun):
        self.weights = 0.01 * torch.rand((n_features, n_neurons))
        self.biases = torch.zeros((1, n_neurons))
        self.output = None
        self.activation_fun = activation_fun
        
    def forward(self, inputs):
        weighted_sum = torch.matmul(inputs, self.weights) + self.biases
        self.activation_fun.forward(weighted_sum)
        self.output = self.activation_fun.output
        
    def get_output(self):
        return self.output

In [13]:
class LossMSE:
    def forward(self, y_pred, y_true):
        self.output = torch.mean((y_pred - y_true)**2)
        return self.output

In [16]:
"""
Architecture:
Input: 2, Hidden layer: 4, activation: sigmoid, output: 2, Hyperparameters: loss function: MSE, Learning rate: 0.001, Number of Epochs: 1000
"""
def backward_propagation(X, y, hidden_layer, output_layer, activation_hidden, activation_output, loss_function, learning_rate=0.01):
    loss = loss_function.forward(output_layer.get_output(), y)
    
    back_output = 2*(output_layer.get_output()-y)
    output_layer.weights -= learning_rate * torch.matmul(hidden_layer.get_output().T, back_output)
    output_layer.biases -= learning_rate * torch.sum(back_output, axis = 0, keepdim=True)
    
    back_hidden = torch.matmul(back_output, output_layer.weights.T)
    back_hidden *= (hidden_layer.get_output() * (1 - hidden_layer.get_output()))
    hidden_layer.weights -= learning_rate * torch.matmul(X.T, back_hidden)
    hidden_layer.biases -= learning_rate * torch.sum(back_hidden, axis=0, keepdim=True)

    return loss


In [17]:
x = torch.rand((1, 2))
y = torch.rand((1, 2))

activation_hidden = ActivationSigmoid()
activation_output = ActivationLinear()
hidden_layer = DenseLayer(n_features=2, n_neurons=4, activation_fun=activation_hidden)
output_layer = DenseLayer(n_features=4, n_neurons=2, activation_fun=activation_output)

loss_function = LossMSE()

learning_rate = 0.001
num_epochs = 1000

hidden_layer.forward(x)
output_layer.forward(hidden_layer.output)
y_pred = output_layer.output
initial_loss = loss_function.forward(y_pred, y)

print("Initial loss:", initial_loss.item())
print("Initial prediction:", y_pred)

for epoch in range(num_epochs):
    loss = backward_propagation(x, y, hidden_layer, output_layer, activation_hidden, activation_output, loss_function, learning_rate=learning_rate)

    hidden_layer.forward(x)
    output_layer.forward(hidden_layer.get_output())
    y_pred = output_layer.output
    # Calculate error for convergence check
    err = loss_function.forward(y_pred, y)

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {err.item()}")

    # Convergence check
    if err <= 0.0001:
        print("Converged at epoch", epoch)
        break

print("Final loss:", err.item())
print("Final prediction:", y_pred)
print("Target value:", y)

Initial loss: 0.5130482316017151
Initial prediction: tensor([[0.0063, 0.0103]])
Epoch 0, Loss: 0.5089472532272339
Epoch 100, Loss: 0.22792719304561615
Epoch 200, Loss: 0.10179740190505981
Epoch 300, Loss: 0.04532473534345627
Epoch 400, Loss: 0.020127516239881516
Epoch 500, Loss: 0.008920165710151196
Epoch 600, Loss: 0.0039475117810070515
Epoch 700, Loss: 0.0017451572930440307
Epoch 800, Loss: 0.0007709802594035864
Epoch 900, Loss: 0.00034044316271319985
Final loss: 0.00015151695697568357
Final prediction: tensor([[0.6415, 0.7768]])
Target value: tensor([[0.6526, 0.7902]])
