<a href="https://colab.research.google.com/github/S-VATS31/Deep-Learning-Models-Mini-Models/blob/main/Backpropagation_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Import Numpy**


In [5]:
import numpy as np


# **Neural Network and Forward/Backward Propagation from Scratch**

In [6]:
class NeuralNetwork:
    def __init__(self, layer_sizes):
        # Set up Number of Layers and Layer Sizes
        self.layers = len(layer_sizes)
        self.layer_sizes = layer_sizes
        self.params = self.initialize_params()

    # Initialize Parameters
    def initialize_params(self):
        params = {}
        for layer in range(1, self.layers):
            # Xavier Initialization
            params[f'W{layer}'] = np.random.randn(self.layer_sizes[layer], self.layer_sizes[layer-1]) * np.sqrt(2 / self.layer_sizes[layer-1])
            params[f'b{layer}'] = np.zeros((self.layer_sizes[layer], 1))
        return params

    def weighted_sum(self, A_prev, W, b):
        # Ensure A_prev is properly shaped (features × examples)
        if len(A_prev.shape) == 2 and A_prev.shape[1] != W.shape[1]:
            A_prev = A_prev.T
        Z = np.dot(W, A_prev) + b # Weighted Sum Calculation
        return Z

    def sigmoid_activation(self, Z): # Sigmoid Function
        A = 1.0 / (1.0 + np.exp(-Z))
        return A

    def derivative_sigmoid(self, A): # Derivative of Sigmoid Function
        dA = A * (1 - A)
        return dA

    def forward_propagation(self, X):
        # X should be features x examples
        if X.shape[0] != self.layer_sizes[0]:
            X = X.T

        A = X # Set Initial Activation Values
        cache = {'A0': A} # Set up Cache for Backpropagation

        for layer in range(1, self.layers):
            A_prev = A  # Activation of Layer A - 1
            W = self.params[f'W{layer}']
            b = self.params[f'b{layer}']

            Z = self.weighted_sum(A_prev, W, b) # Call Weighted sum Function
            A = self.sigmoid_activation(Z) # Call Sigmoid Activation Function

            cache[f'Z{layer}'] = Z # Store Z values in cache (Z)
            cache[f'A{layer}'] = A # Store Activations in Cache (A)

        return A, cache

    def backward_propagation(self, X, y, alpha):
        # Transpose if needed
        if X.shape[0] != self.layer_sizes[0]:
            X = X.T
        if y.shape[0] != self.layer_sizes[-1]:
            y = y.T

        m = X.shape[1]  # Number of examples
        A_final, cache = self.forward_propagation(X)  # Call forward propagation function

        # Initialize dA for output layer
        dA = (2/m) * (A_final - y)  # Derivative of Loss W.R.T Output

        for layer in range(self.layers - 1, 0, -1):
            # Get the activation from previous layer
            A_prev = cache[f'A{layer-1}'] if layer > 1 else X

            # Calculate gradients
            dZ = dA * self.derivative_sigmoid(cache[f'A{layer}'])
            dW = (1/m) * np.dot(dZ, A_prev.T)
            db = (1/m) * np.sum(dZ, axis=1, keepdims=True)

            # Update parameters
            self.params[f'W{layer}'] -= alpha * dW
            self.params[f'b{layer}'] -= alpha * db

            # Prepare dA for previous layer
            if layer > 1:
                dA = np.dot(self.params[f'W{layer}'].T, dZ)

layer_sizes = [3, 5, 1]
nn = NeuralNetwork(layer_sizes)

# Random Data
X = np.random.randn(10, 3)  # 10 examples, 3 features
y = np.random.randn(10, 1)  # 10 examples, 1 output

# Training Loop
for i in range(1000):
    A_final, _ = nn.forward_propagation(X)
    nn.backward_propagation(X, y, alpha=1e-4)

ValueError: shapes (5,3) and (10,3) not aligned: 3 (dim 1) != 10 (dim 0)

**X** = Feature Matrix

**y** = True Output

