In [1]:
# Import libraries
import numpy as np

class Network(object):

    def __init__(self, sizes):
        """Initialize the network with given layer sizes."""
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        """Return the output of the network for input `a`."""
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            a = relu(np.dot(w, a) + b)  # Use ReLU activation for hidden layer
        # Output layer with sigmoid activation
        a = sigmoid(np.dot(self.weights[-1], a) + self.biases[-1])
        return a

    def backprop(self, x, y):
        """Compute the gradient for the cost function C_x."""
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        # Feedforward
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases[:-1], self.weights[:-1]):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = relu(z)
            activations.append(activation)
        # Output layer
        z_output = np.dot(self.weights[-1], activation) + self.biases[-1]
        activation_output = sigmoid(z_output)
        activations.append(activation_output)
        # Backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(z_output)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        # Backpropagation for hidden layers
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = relu_prime(z)  # ReLU derivative
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
        return nabla_b, nabla_w

    def evaluate(self, test_data):
        """Return the number of test inputs for which the network outputs the correct result."""
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives ∂C_x / ∂a for the output activations."""
        return (output_activations - y)

# Activation functions
def sigmoid(z):
    """Sigmoid activation function."""
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z) * (1 - sigmoid(z))

def relu(z):
    """ReLU activation function."""
    return np.maximum(0, z)

def relu_prime(z):
    """Derivative of the ReLU function."""
    return np.where(z > 0, 1, 0)
net = Network([2, 1])

# Create a sample input
input_data = np.array([[0.5], [0.8]])

# Perform a forward pass
output = net.feedforward(input_data)
print("Forward pass output:", output)

# Create a sample target output
target_output = np.array([[0.7]])

# Perform one backpropagation step
nabla_b, nabla_w = net.backprop(input_data, target_output)
print("Gradient for biases:", nabla_b)
print("Gradient for weights:", nabla_w)

Forward pass output: [[0.4129139]]
Gradient for biases: [array([[-0.06959427]])]
Gradient for weights: [array([[-0.03479713, -0.05567541]])]
