In [9]:
import numpy as np

# Part 1: Function Implementations

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)

def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

def linear(z):
    return z

def linear_derivative(z):
    return np.ones_like(z)

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Part 2: NeuralNetwork Class Implementation
class NeuralNetwork:
    def __init__(self, layers, activation='sigmoid'):
        self.layers = layers
        self.activation_name = activation
        self.weights = [np.random.randn(layers[i], layers[i + 1]) for i in range(len(layers) - 1)]
        self.biases = [np.random.randn(layers[i + 1]) for i in range(len(layers) - 1)]

        if activation == 'sigmoid':
            self.activation = sigmoid
            self.activation_derivative = sigmoid_derivative
        elif activation == 'relu':
            self.activation = relu
            self.activation_derivative = relu_derivative
        elif activation == 'linear':
            self.activation = linear
            self.activation_derivative = linear_derivative
        else:
            raise ValueError("Activation Function must be sigmoid , relu , linear")

    def forward(self, x):
        activations = [x]
        z_values = []

        a = x
        for w, b in zip(self.weights[:-1], self.biases[:-1]):
            z = np.dot(a, w) + b
            z_values.append(z)
            a = self.activation(z)
            activations.append(a)

        # Last layer (output), use linear activation for regression
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        z_values.append(z)
        a = linear(z)
        activations.append(a)

        return activations, z_values

    def backward(self, x, y, activations, z_values, learning_rate):
        m = x.shape[0]
        deltas = [None] * (len(self.layers) - 1)

        # Output layer error
        error = activations[-1] - y
        deltas[-1] = error * linear_derivative(z_values[-1])

        # Hidden layer error
        for l in range(len(deltas) - 2, -1, -1):
            deltas[l] = np.dot(deltas[l + 1], self.weights[l + 1].T) * self.activation_derivative(z_values[l])

        # Update weights and biases
        for l in range(len(self.weights)):
            dw = np.dot(activations[l].T, deltas[l]) / m
            db = np.mean(deltas[l], axis=0)
            self.weights[l] -= learning_rate * dw
            self.biases[l] -= learning_rate * db

    def train(self, x_train, y_train, epochs, learning_rate):
        for epoch in range(epochs):
            activations, z_values = self.forward(x_train)
            self.backward(x_train, y_train, activations, z_values, learning_rate)
            if epoch % 1000 == 0:
                mse = mean_squared_error(y_train, activations[-1])
                print(f"Epoch {epoch}, MSE: {mse:.4f}")

    def predict(self, x_test):
        activations, _ = self.forward(x_test)
        return activations[-1]

# Example Usage (Test your implementation here)
# Example: XOR problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Create and train the neural network
nn = NeuralNetwork([2, 4, 1], activation='sigmoid')
nn.train(X, y, epochs=10000, learning_rate=0.1)

# Make predictions
predictions = nn.predict(X)
print("Predictions:\n", predictions)


Epoch 0, MSE: 1.2798
Epoch 1000, MSE: 0.2335
Epoch 2000, MSE: 0.1655
Epoch 3000, MSE: 0.0258
Epoch 4000, MSE: 0.0005
Epoch 5000, MSE: 0.0000
Epoch 6000, MSE: 0.0000
Epoch 7000, MSE: 0.0000
Epoch 8000, MSE: 0.0000
Epoch 9000, MSE: 0.0000
Predictions:
 [[9.09859221e-09]
 [9.99999987e-01]
 [9.99999985e-01]
 [2.19095401e-08]]
