#Neural_Network_Extended

1)Architecture:

Supports customizable layers (input, hidden, output) and activation functions (e.g., ReLU, Sigmoid, Tanh).
Xavier initialization for weights ensures efficient training.

2)Training:

Implements forward and backward propagation with gradient clipping to stabilize updates.
Dropout for regularization, learning rate decay for gradual optimization.
Batch training with optional batch size configuration.

3)Evaluation:

Calculates loss (mean squared error) and accuracy.
Makes predictions with threshold-based binary classification.

4)Utilities:

Save and load model weights/biases using JSON.
Plots training loss for visualization.

5)Example:

Solves the XOR problem with a 2-input, 1-hidden layer (4 neurons), and 1-output architecture.


Key Outputs:

Training Loss over epochs (visualized via plot_training_history).
Final Results: Loss, accuracy, and detailed predictions for the XOR dataset.
Strengths:
Flexible and modular design.
Includes dropout, learning rate decay, and gradient clipping for robustness.
Example Use Case:
The code is an educational implementation for solving small-scale problems like XOR classification and can be expanded for other feedforward neural network tasks.

In [6]:
import numpy as np
from typing import List, Tuple, Optional
import json
import logging
import matplotlib.pyplot as plt

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

class NeuralNetwork:
    """
    A flexible implementation of a feedforward neural network using NumPy.
    Supports multiple hidden layers, activation functions, learning rate decay, dropout, and more.
    """

    def __init__(self, input_size: int, hidden_layers_sizes: List[int], output_size: int,
                 activations: List[str], dropout_rates: Optional[List[float]] = None):
        """
        Initialize the neural network with the specified architecture.

        Args:
            input_size: Number of input features
            hidden_layers_sizes: List containing the size of each hidden layer
            output_size: Number of output neurons
            activations: List of activation functions for each layer
            dropout_rates: List of dropout rates for each layer (optional)
        """
        self.input_size = input_size
        self.hidden_layers_sizes = hidden_layers_sizes
        self.output_size = output_size
        self.activations = activations
        self.dropout_rates = dropout_rates if dropout_rates else [0.0] * len(activations)

        # Initialize weights and biases
        self.weights = []
        self.biases = []

        layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
        for i in range(len(layer_sizes) - 1):
            limit = np.sqrt(6 / (layer_sizes[i] + layer_sizes[i + 1]))  # Xavier Initialization
            self.weights.append(np.random.uniform(-limit, limit, (layer_sizes[i], layer_sizes[i + 1])))
            self.biases.append(np.zeros((1, layer_sizes[i + 1])))

        self.activations_cache = []
        self.dropouts_cache = []

    @staticmethod
    def activation_function(name: str, x: np.ndarray) -> np.ndarray:
        """Apply the specified activation function."""
        if name == "sigmoid":
            return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
        elif name == "relu":
            return np.maximum(0, x)
        elif name == "tanh":
            return np.tanh(x)
        raise ValueError(f"Unsupported activation function: {name}")

    @staticmethod
    def activation_derivative(name: str, x: np.ndarray) -> np.ndarray:
        """Compute the derivative of the specified activation function."""
        if name == "sigmoid":
            s = 1 / (1 + np.exp(-np.clip(x, -500, 500)))
            return s * (1 - s)
        elif name == "relu":
            return np.where(x > 0, 1, 0)
        elif name == "tanh":
            return 1 - np.tanh(x) ** 2
        raise ValueError(f"Unsupported activation function: {name}")

    def forward(self, X: np.ndarray) -> np.ndarray:
        """Perform forward propagation through the network."""
        self.activations_cache = [X]
        self.dropouts_cache = []

        for i in range(len(self.weights)):
            z = np.dot(self.activations_cache[-1], self.weights[i]) + self.biases[i]
            activation = self.activation_function(self.activations[i], z)

            # Apply dropout if specified
            if self.dropout_rates[i] > 0:
                dropout_mask = np.random.rand(*activation.shape) > self.dropout_rates[i]
                activation = activation * dropout_mask / (1 - self.dropout_rates[i])
                self.dropouts_cache.append(dropout_mask)
            else:
                self.dropouts_cache.append(None)

            self.activations_cache.append(activation)

        return self.activations_cache[-1]

    def backward(self, X: np.ndarray, y: np.ndarray, learning_rate: float) -> None:
        """Perform backpropagation to update weights and biases."""
        m = len(y)
        y = y.reshape(-1, 1)

        error = self.activations_cache[-1] - y
        delta = error * self.activation_derivative(self.activations[-1], self.activations_cache[-1])

        for i in reversed(range(len(self.weights))):
            weight_grad = np.dot(self.activations_cache[i].T, delta) / m
            bias_grad = np.sum(delta, axis=0, keepdims=True) / m

            # Apply gradient clipping
            weight_grad = np.clip(weight_grad, -1, 1)
            bias_grad = np.clip(bias_grad, -1, 1)

            self.weights[i] -= learning_rate * weight_grad
            self.biases[i] -= learning_rate * bias_grad

            if i > 0:
                delta = np.dot(delta, self.weights[i].T) * self.activation_derivative(
                    self.activations[i - 1], self.activations_cache[i]
                )
                # Apply dropout mask
                if self.dropouts_cache[i - 1] is not None:
                    delta *= self.dropouts_cache[i - 1]

    def train(self, X: np.ndarray, y: np.ndarray, epochs: int, learning_rate: float,
              decay_rate: float = 0.0, batch_size: Optional[int] = None, verbose: bool = True):
        """Train the network."""
        batch_size = batch_size or len(X)
        losses = []

        for epoch in range(epochs):
            indices = np.random.permutation(len(X))
            X_shuffled, y_shuffled = X[indices], y[indices]

            for i in range(0, len(X), batch_size):
                batch_X = X_shuffled[i:i + batch_size]
                batch_y = y_shuffled[i:i + batch_size]

                self.forward(batch_X)
                self.backward(batch_X, batch_y, learning_rate)

            # Apply learning rate decay
            learning_rate = learning_rate / (1 + decay_rate * epoch)

            # Compute loss and log
            predictions = self.forward(X)
            loss = np.mean((y - predictions) ** 2)
            losses.append(loss)

            if verbose and epoch % 100 == 0:
                logging.info(f"Epoch {epoch}/{epochs} - Loss: {loss:.6f}")

        return losses

    def evaluate(self, X: np.ndarray, y: np.ndarray) -> Tuple[float, float]:
        """Evaluate the network."""
        predictions = self.forward(X)
        loss = np.mean((y - predictions) ** 2)
        accuracy = np.mean((predictions > 0.5) == y)
        return loss, accuracy

    def save_model(self, filepath: str) -> None:
        """Save model weights and biases."""
        model_data = {
            "weights": [w.tolist() for w in self.weights],
            "biases": [b.tolist() for b in self.biases]
        }
        with open(filepath, "w") as f:
            json.dump(model_data, f)

    def load_model(self, filepath: str) -> None:
        """Load model weights and biases."""
        with open(filepath, "r") as f:
            model_data = json.load(f)
        self.weights = [np.array(w) for w in model_data["weights"]]
        self.biases = [np.array(b) for b in model_data["biases"]]

    def plot_training_history(self, losses: List[float]) -> None:
        """Plot training loss."""
        plt.plot(losses, label="Loss")
        plt.title("Training Loss")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()

# Example Usage
def main():
    """Example usage with the XOR problem."""
    # XOR problem dataset
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([0, 1, 1, 0])

    try:
        # Create network
        nn = NeuralNetwork(input_size=2, hidden_layers_sizes=[4], output_size=1, activations=["relu", "sigmoid"])

        # Train network
        losses = nn.train(X, y, epochs=5000, learning_rate=0.1, decay_rate=0.01, batch_size=4)

        # Evaluate and print results
        loss, accuracy = nn.evaluate(X, y)
        print("\nFinal Results:")
        print(f"Loss: {loss:.6f}")
        print(f"Accuracy: {accuracy:.2%}\n")

        # Show predictions
        print("Predictions:")
        predictions = nn.forward(X)
        for i in range(len(X)):
            predicted_value = predictions[i][0]
            expected_value = y[i]
            print(f"Input: {X[i]} -> Predicted: {predicted_value:.4f} (Expected: {expected_value})")

    except Exception as e:
        logging.error(f"Error in main: {str(e)}")
        raise

if __name__ == "__main__":
    main()




Final Results:
Loss: 0.250019
Accuracy: 50.00%

Predictions:
Input: [0 0] -> Predicted: 0.5003 (Expected: 0)
Input: [0 1] -> Predicted: 0.4914 (Expected: 1)
Input: [1 0] -> Predicted: 0.5003 (Expected: 1)
Input: [1 1] -> Predicted: 0.5003 (Expected: 0)
