In [None]:
import torch
import numpy as np 
import matplotlib.pyplot as plt
import warnings

import torch.nn as nn
import torch.nn.functional as F



# XOR example

 X1 | X2| Y 
----|----|----
0|0|0
0|1|1
1|0|1
1|1|0


<p align="center">
  <img src="images/Ann_1.jpg" alt="Computational Graph">
</p>

In [None]:
X =   torch.tensor([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]], dtype=torch.float32)


y = torch.tensor([[0],
              [1],
              [1],
              [0]], dtype=torch.float32)

In [None]:
#check the shapes
print(f"Input: {X.shape}") #Total 4 cases each case two features
print(f"Output: {y.shape}") #Total 4 cases each case one output

In [None]:
# Set seed for reproducibility
np.random.seed(1)

# Initialize weights randomly with mean 0
input_layer_neurons = X.shape[1] #index starts with zero
hidden_layer_neurons = 3 #Total 3 hidden units
output_neuron = 1 #output size

In [None]:
W1 = torch.nn.Parameter(2 * torch.rand((input_layer_neurons, hidden_layer_neurons)) - 1, requires_grad=True)
b1 = torch.nn.Parameter(2 * torch.rand((1, hidden_layer_neurons)) - 1, requires_grad=True)




#Lets print shape
print(f"Weights shape from input to hidden layer w_1: {W1.shape}")
print(f"Bias shape from input to hidden layer b_1: {b1.shape}")



# Weights and biases for the hidden to output layer
W2 = torch.nn.Parameter(2 * torch.rand((hidden_layer_neurons, output_neuron)) - 1, requires_grad=True)
b2 = torch.nn.Parameter(2 * torch.rand((1, output_neuron)) - 1, requires_grad=True)

#Lets print shape
print(f"Weights shape from hidden layer to output layer w_2: {W2.shape}")
print(f"Bias shape from hidden layer to output layer b_2: {b2.shape}")

## Forward Propagation with Bias

- Input to Hidden layer
    - Compute pre-activation function $z^{(1)}$
    $$z^{(1)} = X_{4\times2}W^{(1)}_{2\times3} + b^{(1)}_{1\times3} \hspace{10mm}(b^{(1)} \text{ broad casted to all rows})$$ 
    - Apply the Activation function $a^{(1)}$
    $$a^{(1)}=\sigma(z^{(1)})$$



Example
$$
A_{4\times3}\hspace{2mm}B_{1\times3}
= \left(\begin{array}{cc} 
1 & 2&3 \\[10pt] 
4 & 5&6 \\[10pt]
7 & 8&9 \\[10pt] 
5 & 2&3 \\[10pt]
\end{array}\right)_{4\times3} + 
\left(\begin{array}{cc} 
10 & 20&30 \\[10pt]
\end{array}\right)_{1\times3}

\\[10pt]
= \left(\begin{array}{cc} 
1 & 2&3 \\[10pt] 
4 & 5&6 \\[10pt]
7 & 8&9 \\[10pt] 
5 & 2&3 \\[10pt]
\end{array}\right)_{4\times3} + 
\left(\begin{array}{cc} 
10 & 20&30 \\[10pt]
10 & 20&30 \\[10pt]
10 & 20&30 \\[10pt]
10 & 20&30 \\[10pt]
\end{array}\right)_{4\times3}
$$

In [None]:
z1 = X @ W1 + b1
a1 = F.sigmoid(z1)

print(f"Pre activation function shape z_1: {z1.shape}")
print(f"Activation function shape a_1: {a1.shape}")

- Hidden to Output layer
    - Compute pre-activation function $z^{(1)}$
    $$z^{(2)} = a^{1}_{4\times3}W^{(2)}_{3\times1} + b^{(2)}_{1\times1} \hspace{10mm}(b^{(2)} \text{ broad casted to all rows})$$ 
    - Apply the Activation function $a^{(1)}$
    $$a^{(2)}=\sigma(z^{(2)})$$


In [None]:
z2 = a1 @ W2 + b2
a2 = F.sigmoid(z2)

print(f"Pre activation function shape z_2: {z2.shape}")
print(f"Activation function shape a_2: {a2.shape}")

y_hat = a2

- Calculate error
    $$\hat{y} = a^{(2)}$$
    $$\mathcal{L}= \frac{1}{2n} \sum_{i=1}^{n}(\hat{y_i}- y_i)^2$$

In [None]:
#Calculate mse
loss = torch.mean((y - y_hat) ** 2) / 2

print(f"MSE: {loss}")

In [None]:
def initialize(input_layer_neurons, hidden_layer_neurons, output_neuron):
    W1 = torch.nn.Parameter(2 * torch.rand((input_layer_neurons, hidden_layer_neurons)) - 1, requires_grad=True)
    b1 = torch.nn.Parameter(2 * torch.rand((1, hidden_layer_neurons)) - 1, requires_grad=True)




    #Lets print shape
    print(f"Weights shape from input to hidden layer w_1: {W1.shape}")
    print(f"Bias shape from input to hidden layer b_1: {b1.shape}")



    # Weights and biases for the hidden to output layer
    W2 = torch.nn.Parameter(2 * torch.rand((hidden_layer_neurons, output_neuron)) - 1, requires_grad=True)
    b2 = torch.nn.Parameter(2 * torch.rand((1, output_neuron)) - 1, requires_grad=True)

    #Lets print shape
    print(f"Weights shape from hidden layer to output layer w_2: {W2.shape}")
    print(f"Bias shape from hidden layer to output layer b_2: {b2.shape}")
    return W1, b1, W2, b2

In [None]:
# Forward pass
def forward_prop(X, W1, b1, W2, b2):
    # From input to hidden layer
    z1 = torch.matmul(X, W1) + b1
    a1 = torch.sigmoid(z1)
    z2 = torch.matmul(a1, W2) + b2
    a2 = torch.sigmoid(z2)

    return z1, a1, z2, a2

In [None]:
epoch = 100000
W1, b1, W2, b2 = initialize(input_layer_neurons, hidden_layer_neurons, output_neuron)

eta = 9e-2
losses = []

for epoch in range(epoch):
    z1, a1, z2, a2 = forward_prop(X, W1, b1, W2, b2)

    loss = torch.mean((y - a2) ** 2) / 2
    losses.append(loss.item())

    loss.backward(retain_graph=True)
    with torch.no_grad():
        W1 -= eta * W1.grad
        b1 -= eta * b1.grad
        W2 -= eta * W2.grad
        b2 -= eta * b2.grad

        # Manually zero the gradients after updating weights
        W1.grad.zero_()
        b1.grad.zero_()
        W2.grad.zero_()
        b2.grad.zero_()

    if epoch % 5000 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")






In [None]:
# Plot the data points
plt.scatter(X[:, 0], X[:, 1], c=y.numpy().ravel(), cmap='viridis', marker='o', s=100, edgecolor='k')
plt.title('XOR Problem')

# Create a mesh to plot the decision boundary
xx, yy = np.meshgrid(np.linspace(-0.5, 1.5, 100), np.linspace(-0.5, 1.5, 100))
grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)

# Forward pass on the grid
_, _, _, a2_grid = forward_prop(grid, W1, b1, W2, b2)
a2_grid = a2_grid.detach().numpy().reshape(xx.shape)

# Plot the decision boundary
plt.contourf(xx, yy, a2_grid, levels=[0, 0.5, 1], alpha=0.2, colors=['blue', 'yellow'])
plt.colorbar()
plt.show()

# Plot the loss over iterations
plt.plot(losses)
plt.title('Loss over iterations')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Final output
_, _, _, final_output = forward_prop(X, W1, b1, W2, b2)
predictions = np.where(final_output.detach().numpy() > 0.5, 1, 0)

print("Final predictions:\n", predictions)

# Exercise

Experiment with other activation functions and check the result