# Backpropagation

## We have to consider the following steps

## How to start now?

## Building your neural network

In [4]:
# Your code

# Consider the following steps:
# 1) Loop through your training data
#   1. 1) Choose number of epochs (How often do you want to loop through your complete dataset?)
# 2) Forward the data through your network
# 3) Calculate the loss
# 4) Perform backpropagation with SGD and update the weights
#   4. 1) Choose a learning rate to update your weights
# Repeat 1, 2, 3, 4 until the training converges or maximum epochs are reached

In [1]:

'''
Input (X) = [x1, x2]
Target (y) = [1]

--- Forward Pass ---

1. Input Layer to Hidden Layer:
   w1 = weight from x1 to h1
   w2 = weight from x2 to h1
   w3 = weight from x1 to h2
   w4 = weight from x2 to h2
   b1 = bias for h1
   b2 = bias for h2

   h1 = sigmoid(w1*x1 + w2*x2 + b1)
   h2 = sigmoid(w3*x1 + w4*x2 + b2)

2. Hidden Layer to Output Layer:
   w5 = weight from h1 to output
   w6 = weight from h2 to output
   b3 = bias for output

   output = sigmoid(w5*h1 + w6*h2 + b3)

3. Calculate MSE Loss:
   loss = 0.5 * (output - y)^2


   

--- Backward Pass ---

1. Compute Gradients:
   - d_loss/d_output = output - y
   - d_output/d_w5 = h1 * output * (1 - output)
   - d_output/d_w6 = h2 * output * (1 - output)
   - d_output/d_h1 = w5 * output * (1 - output)
   - d_output/d_h2 = w6 * output * (1 - output)
   - d_h1/d_w1 = x1 * h1 * (1 - h1)
   - d_h1/d_w2 = x2 * h1 * (1 - h1)
   - d_h2/d_w3 = x1 * h2 * (1 - h2)
   - d_h2/d_w4 = x2 * h2 * (1 - h2)

2. Backpropagate Gradients:
   - Update weights using the learning rate:
     w1 = w1 - learning_rate * d_loss/d_output * d_output/d_h1 * d_h1/d_w1
   - Similarly, updating other weights and biases.

3. Repeating the process for multiple epochs until convergence.
'''


'Input (X) = [x1, x2]\nTarget (y) = [1]\n\n--- Forward Pass ---\n\n1. Input Layer to Hidden Layer:\n   w1 = weight from x1 to h1\n   w2 = weight from x2 to h1\n   w3 = weight from x1 to h2\n   w4 = weight from x2 to h2\n   b1 = bias for h1\n   b2 = bias for h2\n\n   h1 = sigmoid(w1*x1 + w2*x2 + b1)\n   h2 = sigmoid(w3*x1 + w4*x2 + b2)\n\n2. Hidden Layer to Output Layer:\n   w5 = weight from h1 to output\n   w6 = weight from h2 to output\n   b3 = bias for output\n\n   output = sigmoid(w5*h1 + w6*h2 + b3)\n\n3. Calculate MSE Loss:\n   loss = 0.5 * (output - y)^2\n\n--- Backward Pass ---\n\n1. Compute Gradients:\n   - d_loss/d_output = output - y\n   - d_output/d_w5 = h1 * output * (1 - output)\n   - d_output/d_w6 = h2 * output * (1 - output)\n   - d_output/d_h1 = w5 * output * (1 - output)\n   - d_output/d_h2 = w6 * output * (1 - output)\n   - d_h1/d_w1 = x1 * h1 * (1 - h1)\n   - d_h1/d_w2 = x2 * h1 * (1 - h1)\n   - d_h2/d_w3 = x1 * h2 * (1 - h2)\n   - d_h2/d_w4 = x2 * h2 * (1 - h2)\n\n2

In [5]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_derivative(x):
    return x*(1-x)

input_size = 2
hidden_layer_size = 2
output_size = 1

np.random.seed(42)
weights_input_hidden = np.random.rand(input_size, hidden_layer_size)  # shape 2x2
weights_hidden_output = np.random.rand(hidden_layer_size, output_size) #shape 2x1

bias_hidden = np.zeros((1, hidden_layer_size))
bias_output = np.zeros((1,output_size))

#Training
X = np.array([[0.5,0.6]])
y = np.array([[0.8]])


#Parameters
learning_rate = 0.1
epochs = 100



for epoch in range(epochs):
    # Forward Pass
    # Input to Hidden Layer

    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    # Hidden Layer to Output Layer
    output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    predicted_output = sigmoid(output_input)

    # Calculating Loss
    loss = 0.5 * np.sum((predicted_output - y) ** 2)

    # Backward Pass
    # Computing Gradients
    d_loss_d_output = predicted_output - y
    d_output_d_hidden = sigmoid_derivative(predicted_output)
    d_hidden_d_input = sigmoid_derivative(hidden_output)

    # Updating weights and biases using the chain rule
    weights_hidden_output -= learning_rate * np.dot(hidden_output.T, d_loss_d_output * d_output_d_hidden)
    weights_input_hidden -= learning_rate * np.dot(X.T, np.dot(d_loss_d_output * d_output_d_hidden, weights_hidden_output.T) * d_hidden_d_input)

    bias_output -= learning_rate * np.sum(d_loss_d_output * d_output_d_hidden, axis=0, keepdims=True)
    bias_hidden -= learning_rate * np.sum(np.dot(d_loss_d_output * d_output_d_hidden, weights_hidden_output.T) * d_hidden_d_input, axis=0, keepdims=True)

    # Printing the loss for every 10 epochs
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss}')



Epoch [10/100], Loss: 0.024850926922809005
Epoch [20/100], Loss: 0.019815846923920258
Epoch [30/100], Loss: 0.015923917754564688
Epoch [40/100], Loss: 0.012899201199448299
Epoch [50/100], Loss: 0.010531406234111765
Epoch [60/100], Loss: 0.008662813808854386
Epoch [70/100], Loss: 0.007175820212663337
Epoch [80/100], Loss: 0.00598270229969578
Epoch [90/100], Loss: 0.005017757799181896
Epoch [100/100], Loss: 0.0042314681042219015
