In [7]:
import math

def sigmoid(x):
    return 1/(1+math.exp(-x))

def sigmoid_derivative(x):
    sx = sigmoid(x)
    return sx * (1-sx)

In [9]:
#inputs
x1 = 0.05
x2 = 0.10

#target ouput
target = 0.01

#weights i/p -> hidden
w1 = 0.15 # x1 -> h1
w2 = 0.20 # x2 -> h2
w3 = 0.25 # x1 -> h2
w4 = 0.30 # x2 -> h2

#weights hidden -> o/p
w5 = 0.40 # h1 -> o/p
w6 = 0.45 # h2 -> o/p

#biases
b1 = 0.35 # for both hidden neurons
b2 = 0.60 # o/p neuron

In [12]:
#forward pass hidden layer
#hidden layer i/p before activation
z_h1 = x1*w1 + x2*w2 + b1
z_h2 = x1*w3 + x2*w4 + b1

#hidden layer o/p after activation
h1 = sigmoid(z_h1)
h2 = sigmoid(z_h2)

print(f"hidden neuron h1: input = {z_h1:.5f}, output = {h1:.5f}")
print(f"hidden neuron h2: input = {z_h2:.5f}, output = {h2:.5f}")

hidden neuron h1: input = 0.37750, output = 0.59327
hidden neuron h2: input = 0.39250, output = 0.59688


In [13]:
#forward pass o/p layer
#weighted sum i/p to o/p neuron
z_out = h1*w5 + h2*w6 + b2

#activated o/p prediction
y_hat = sigmoid(z_out)

print(f"output neuron: input = {z_out:.5f}, output = {y_hat:.5f}")

output neuron: input = 1.10591, output = 0.75137


In [14]:
#mean square error
loss = 0.5 * (target - y_hat) ** 2
print(f"loss: {loss:.5f}")

loss: 0.27481


In [16]:
#calculating backpropagation gradients

#calculate gradient at o/p neuron

#dE/dy_hat
dE_dy_hat = y_hat - target

#dy_hat_dz_out (derivative od sigmoid)
dy_hat_dz_out = sigmoid_derivative(z_out)

#chain rule: dE/dz_out
dE_dz_out = dE_dy_hat* dy_hat_dz_out

print(f"dE/dz_out: {dE_dz_out:.5f}")

dE/dz_out: 0.13850


In [21]:
#gradients of loss w.r.t weights from hidden -> o/p layer

dE_dw5 = dE_dz_out * h1
dE_dw6 = dE_dz_out * h2

print(f"dE/dw5: {dE_dw5:.5f}")
print(f"dE/dw6: {dE_dw6:.5f}")

dE/dw5: 0.08217
dE/dw6: 0.08267


In [22]:
learning_rate = 0.5
w5 = w5 - learning_rate * dE_dw5
w6 = w6 - learning_rate * dE_dw6

print(f"updated w5: {w5:.5f}")
print(f"updated w6: {w6:.5f}")

updated w5: 0.35892
updated w6: 0.40867


In [23]:
#delta for hidden neurons
delta_h1 = dE_dz_out * w5 * sigmoid_derivative(z_h1)
delta_h2 = dE_dz_out * w6 * sigmoid_derivative(z_h2)

print(f"delta_h1: {delta_h1:.5f}")
print(f"delta_h2: {delta_h2:.5f}")

delta_h1: 0.01199
delta_h2: 0.01362


In [25]:
dE_dw1 = delta_h1 * x1
dE_dw2 = delta_h1 * x2
dE_dw3 = delta_h2 * x1
dE_dw4 = delta_h2 * x2

print(f"dE/dw1: {dE_dw1:.5f}")
print(f"dE/dw2: {dE_dw2:.5f}")
print(f"dE/dw3: {dE_dw3:.5f}")
print(f"dE/dw4: {dE_dw4:.5f}")

dE/dw1: 0.00060
dE/dw2: 0.00120
dE/dw3: 0.00068
dE/dw4: 0.00136


In [26]:
w1 = w1 - learning_rate * dE_dw1
w2 = w2 - learning_rate * dE_dw2
w3 = w3 - learning_rate * dE_dw3
w4 = w4 - learning_rate * dE_dw4

print(f"Updated w1: {w1:.5f}")
print(f"Updated w2: {w2:.5f}")
print(f"Updated w3: {w3:.5f}")
print(f"Updated w4: {w4:.5f}")


Updated w1: 0.14970
Updated w2: 0.19940
Updated w3: 0.24966
Updated w4: 0.29932


In [27]:
for epoch in range(1, 101):  # 100 iterations
    # forward pass
    z_h1 = x1 * w1 + x2 * w2 + b1
    z_h2 = x1 * w3 + x2 * w4 + b1
    h1 = sigmoid(z_h1)
    h2 = sigmoid(z_h2)

    z_out = h1 * w5 + h2 * w6 + b2
    y_hat = sigmoid(z_out)

    loss = 0.5 * (target - y_hat) ** 2

    dE_dy_hat = y_hat - target
    dy_hat_dz_out = sigmoid_derivative(z_out)
    dE_dz_out = dE_dy_hat * dy_hat_dz_out

    dE_dw5 = dE_dz_out * h1
    dE_dw6 = dE_dz_out * h2

    # gradients for output bias
    dE_db2 = dE_dz_out

    # delta for hidden neurons
    delta_h1 = dE_dz_out * w5 * sigmoid_derivative(z_h1)
    delta_h2 = dE_dz_out * w6 * sigmoid_derivative(z_h2)

    # Gradients for input weights
    dE_dw1 = delta_h1 * x1
    dE_dw2 = delta_h1 * x2
    dE_dw3 = delta_h2 * x1
    dE_dw4 = delta_h2 * x2

    # gradients for hidden bias
    dE_db1 = delta_h1 + delta_h2

    # Update weights and biases
    w1 -= learning_rate * dE_dw1
    w2 -= learning_rate * dE_dw2
    w3 -= learning_rate * dE_dw3
    w4 -= learning_rate * dE_dw4

    w5 -= learning_rate * dE_dw5
    w6 -= learning_rate * dE_dw6

    b1 -= learning_rate * dE_db1
    b2 -= learning_rate * dE_db2

    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss:.5f}, Output = {y_hat:.5f}")


Epoch 10: Loss = 0.11822, Output = 0.49624
Epoch 20: Loss = 0.04518, Output = 0.31061
Epoch 30: Loss = 0.02329, Output = 0.22584
Epoch 40: Loss = 0.01463, Output = 0.18103
Epoch 50: Loss = 0.01029, Output = 0.15349
Epoch 60: Loss = 0.00778, Output = 0.13474
Epoch 70: Loss = 0.00617, Output = 0.12108
Epoch 80: Loss = 0.00506, Output = 0.11062
Epoch 90: Loss = 0.00426, Output = 0.10232
Epoch 100: Loss = 0.00366, Output = 0.09555


In [29]:
import torch
import torch.nn as nn
import torch.optim as optim

x = torch.tensor([0.05, 0.10], dtype=torch.float32)
target = torch.tensor([0.6], dtype=torch.float32)

In [30]:
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.hidden = nn.Linear(2, 2)   # input layer to hidden layer (2 inputs → 2 neurons)
        self.output = nn.Linear(2, 1)   # hidden layer to output layer (2 neurons → 1 output)
        self.sigmoid = nn.Sigmoid()     # sigmoid activation

    def forward(self, x):
        x = self.sigmoid(self.hidden(x))  # hidden layer + activation
        x = self.sigmoid(self.output(x))  # output layer + activation
        return x

# Instantiate the network
net = SimpleNet()


In [31]:
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.5)
# zero gradients from previous step
optimizer.zero_grad()

# forward pass
output = net(x)

loss = criterion(output, target)

# backward pass
loss.backward()

# update weights
optimizer.step()

print(f"Output: {output.item():.5f}, Loss: {loss.item():.5f}")



Output: 0.39019, Loss: 0.04402


In [32]:
for epoch in range(1, 101):
    optimizer.zero_grad()
    output = net(x)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Output = {output.item():.5f}, Loss = {loss.item():.5f}")

Epoch 10: Output = 0.53559, Loss = 0.00415
Epoch 20: Output = 0.57998, Loss = 0.00040
Epoch 30: Output = 0.59354, Loss = 0.00004
Epoch 40: Output = 0.59789, Loss = 0.00000
Epoch 50: Output = 0.59930, Loss = 0.00000
Epoch 60: Output = 0.59977, Loss = 0.00000
Epoch 70: Output = 0.59992, Loss = 0.00000
Epoch 80: Output = 0.59998, Loss = 0.00000
Epoch 90: Output = 0.59999, Loss = 0.00000
Epoch 100: Output = 0.60000, Loss = 0.00000
