In [1]:
import numpy as np
from re import A

In [4]:
class NeuralNetwork:
    def __init__(self, neurons_each_layer, num_iterations=1000, learning_rate=0.01):
        """
        Initializes the neural network.

        Args:
            neurons_each_layer (list): Number of neurons in each layer.
            num_iterations (int): Number of training iterations.
            learning_rate (float): Learning rate for gradient descent.
        """
        self.neurons_each_layer = neurons_each_layer
        self.num_iterations = num_iterations
        self.learning_rate = learning_rate
        self.parameters = self.initialize_parameters()

    def initialize_parameters(self):
        """Initializes weights and biases for each layer."""
        parameters = {}
        for l in range(1, len(self.neurons_each_layer)):
            parameters[f"W{l}"] = np.random.randn(self.neurons_each_layer[l], self.neurons_each_layer[l-1]) * 0.01
            parameters[f"b{l}"] = np.zeros((self.neurons_each_layer[l], 1))
        return parameters

    def forward(self, activation, A_pre, W, b):
        """Performs a single forward step for one layer."""
        Z = np.dot(W, A_pre) + b
        linear_cache = (W, A_pre, b)

        if activation == "sigmoid":
            A = 1 / (1 + np.exp(-Z))
            activation_cache = Z
        elif activation == "relu":
            A = np.maximum(0, Z)
            activation_cache = Z

        cache = (linear_cache, activation_cache)
        return A, cache

    def forward_model(self, X):
        """Performs forward propagation through the entire network."""
        caches = []
        A = X
        L = len(self.parameters) // 2

        for l in range(1, L):
            A_prev = A
            A, cache = self.forward("relu", A_prev, self.parameters[f"W{l}"], self.parameters[f"b{l}"])
            caches.append(cache)

        AL, cache = self.forward("sigmoid", A, self.parameters[f"W{L}"], self.parameters[f"b{L}"])
        caches.append(cache)
        return AL, caches

    def compute_cost(self, Y, AL):
        """Computes the cost function."""
        m = Y.shape[1]
        loss = Y * np.log(AL) + (1 - Y) * np.log(1 - AL)
        cost = -np.sum(loss) / m
        return np.squeeze(cost)

    def backward(self, dA, activation, cache):
        """Performs backward propagation for a single layer."""
        linear_cache, activation_cache = cache
        W, A_prev, b = linear_cache
        m = A_prev.shape[1]

        if activation == "relu":
            dZ = np.array(dA, copy=True)
            dZ[activation_cache <= 0] = 0
        elif activation == "sigmoid":
            s = 1 / (1 + np.exp(-activation_cache))
            dZ = dA * s * (1 - s)

        dW = 1 / m * np.dot(dZ, A_prev.T)
        db = 1 / m * np.sum(dZ, axis=1, keepdims=True)
        dA_prev = np.dot(W.T, dZ)

        return dA_prev, dW, db

    def backward_model(self, AL, Y, caches):
        """Performs backward propagation through the entire network."""
        gradients = {}
        L = len(caches)
        Y = Y.reshape(AL.shape)
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

        current_cache = caches[L-1]
        gradients[f"dA{L-1}"], gradients[f"dW{L}"], gradients[f"db{L}"] = self.backward(dAL, "sigmoid", current_cache)

        for l in reversed(range(L-1)):
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.backward(gradients[f"dA{l+1}"], "relu", current_cache)
            gradients[f"dA{l}"] = dA_prev_temp
            gradients[f"dW{l+1}"] = dW_temp
            gradients[f"db{l+1}"] = db_temp

        return gradients

    def update_parameters(self, gradients):
        """Updates the parameters using gradient descent."""
        L = len(self.parameters) // 2

        for l in range(1, L+1):
            self.parameters[f"W{l}"] -= self.learning_rate * gradients[f"dW{l}"]
            self.parameters[f"b{l}"] -= self.learning_rate * gradients[f"db{l}"]

    def train(self, X, Y):
        """Trains the neural network using forward and backward propagation."""
        for i in range(self.num_iterations):
            AL, caches = self.forward_model(X)
            cost = self.compute_cost(Y, AL)
            gradients = self.backward_model(AL, Y, caches)
            self.update_parameters(gradients)

            if i % 100 == 0:
                print(f"Iteration {i}, Cost: {cost}")

    def predict(self, X):
        """Predicts the output for a given input."""
        AL, _ = self.forward_model(X)
        return (AL > 0.5).astype(int)





In [5]:
# Example Usage
n_x = 4
m = 3
np.random.seed(1)
X = np.random.randn(n_x, m)
Y = np.random.randint(0, 2, size=(1, m))
neural_net = NeuralNetwork([n_x, 2, 3, 1], num_iterations=1000, learning_rate=0.01)
neural_net.train(X, Y)
print("Predictions:", neural_net.predict(X))

Iteration 0, Cost: 0.6931470547312185
Iteration 100, Cost: 0.6712612760527906
Iteration 200, Cost: 0.6579585894183689
Iteration 300, Cost: 0.6498276567493818
Iteration 400, Cost: 0.6448247915679844
Iteration 500, Cost: 0.6417264705669424
Iteration 600, Cost: 0.6397963081568999
Iteration 700, Cost: 0.6385877214441823
Iteration 800, Cost: 0.6378276885691334
Iteration 900, Cost: 0.6373480208978698
Predictions: [[0 0 0]]
