# Building a Feedforward Neural Network (FFNN) from Scratch

### 1.Building a Feedforward Neural Network (FFNN)

In [37]:
import numpy as np

# Task 1: Build a Feedforward Neural Network (FFNN)
input_size = 2
hidden_size = 2
output_size = 1

# Initialize random weights and biases for the hidden and output layers
hidden_weights = np.random.randn(input_size, hidden_size)
hidden_biases = np.random.randn(1, hidden_size)
output_weights = np.random.randn(hidden_size, output_size)
output_biases = np.random.randn(1, output_size)

### 2.Implementing Forward Propagation

In [38]:
# Task 2: Implement Forward Propagation
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def forward_propagation(input_data):
    hidden_layer_input = np.dot(input_data, hidden_weights) + hidden_biases
    hidden_layer_output = sigmoid(hidden_layer_input)
    output_layer_input = np.dot(hidden_layer_output, output_weights) + output_biases
    output_layer_output = sigmoid(output_layer_input)
    return output_layer_output

### 3. Defining a Loss Function (Mean Squared Error)

In [39]:
# Task 3: Define a Loss Function (Mean Squared Error)
def mean_squared_error(predicted, actual):
    return np.mean((predicted - actual) ** 2)

### 4.Initializing Hyperparameters

In [40]:
# Task 4: Initialize Hyperparameters
learning_rate = 0.1
num_epochs = 10000
batch_size = 4  # XOR problem dataset size

# XOR problem dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

#### Learning Rate:

Importance: The learning rate controls the step size of weight updates during training. It is arguably one of the most critical hyperparameters in training neural networks.
Impact: A learning rate that is too high can cause the model to converge quickly but may result in overshooting the optimal weights, leading to instability. A learning rate that is too low can lead to slow convergence and potential convergence to suboptimal solutions.
Tuning: The learning rate needs to be tuned carefully. Common strategies include trying a range of values (e.g., 0.1, 0.01, 0.001) and using learning rate schedules (e.g., reducing the learning rate over time) to balance convergence speed and stability.

#### Number of Training Epochs:

Importance: The number of training epochs determines how many times the entire dataset is processed during training. It controls the duration of training.
Impact: Too few epochs may result in underfitting, where the model doesn't capture the underlying patterns in the data. Too many epochs can lead to overfitting, where the model memorizes the training data but doesn't generalize well to new data.
Tuning: The number of epochs depends on the complexity of the problem and the dataset. Techniques like early stopping can help determine when to stop training based on validation performance.

#### Batch Size:

Importance: The batch size defines how many samples are used in each iteration of training. It affects both memory usage and convergence behavior.
Impact: Smaller batch sizes introduce more noise into the gradient estimates but can lead to better convergence and generalization. Larger batch sizes provide more stable gradient estimates but may slow down training and lead to suboptimal convergence.
Tuning: The choice of batch size depends on factors like the dataset size, available memory, and model architecture. Values like 32, 64, or 128 are commonly used starting points.
These hyperparameters are crucial because they directly influence how the neural network learns and generalizes from the data.

### 5 & 6.Implementing Backpropagation Algorithm & Monitoring loss during training

In [41]:
# Task 5: Implement Backpropagation Algorithm
for epoch in range(num_epochs):
    # Mini-batch training (in this case, the whole dataset is one batch)
    input_data = X
    target_output = y

    # Forward propagation
    hidden_layer_input = np.dot(input_data, hidden_weights) + hidden_biases
    hidden_layer_output = sigmoid(hidden_layer_input)
    output_layer_input = np.dot(hidden_layer_output, output_weights) + output_biases
    output_layer_output = sigmoid(output_layer_input)

    # Loss calculation
    loss = mean_squared_error(output_layer_output, target_output)

    # Backpropagation
    output_error = target_output - output_layer_output
    output_delta = output_error * output_layer_output * (1 - output_layer_output)
    
    hidden_layer_error = output_delta.dot(output_weights.T)
    hidden_layer_delta = hidden_layer_error * hidden_layer_output * (1 - hidden_layer_output)

    # Parameter updates
    output_weights += hidden_layer_output.T.dot(output_delta) * learning_rate
    output_biases += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
    hidden_weights += input_data.T.dot(hidden_layer_delta) * learning_rate
    hidden_biases += np.sum(hidden_layer_delta, axis=0, keepdims=True) * learning_rate
    
 # Task 6: Monitor loss during training
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

Epoch 0, Loss: 0.4074865958576914
Epoch 1000, Loss: 0.24994441838112022
Epoch 2000, Loss: 0.24739232166248906
Epoch 3000, Loss: 0.22970976328254944
Epoch 4000, Loss: 0.18396367752620552
Epoch 5000, Loss: 0.151759826701205
Epoch 6000, Loss: 0.06278094996135467
Epoch 7000, Loss: 0.015358949083753171
Epoch 8000, Loss: 0.00791742059911669
Epoch 9000, Loss: 0.005210586222572294


### 7. Printing and Visualizing Weights (printing weights after training)

In [42]:
# Task 7: Print and Visualize Weights (printing weights after training)
print("Hidden Layer Weights:")
print(hidden_weights)
print("Output Layer Weights:")
print(output_weights)

Hidden Layer Weights:
[[-4.408388   -5.4296859 ]
 [ 4.87632863  6.05797701]]
Output Layer Weights:
[[-6.75596773]
 [ 7.14608832]]


### 8. Printing and Visualizing Weights (printing weights after training)

In [43]:
# Task 8: Test the Trained Network
test_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
expected_output = np.array([[0], [1], [1], [0]])

# Forward pass on the test data
predicted_output = forward_propagation(test_data)

# Calculate accuracy for XOR problem
correct_predictions = np.round(predicted_output)
accuracy = np.mean(correct_predictions == expected_output) * 100

print("Test Accuracy:", accuracy, "%")

Test Accuracy: 100.0 %
