In [None]:
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(0, x)

def sigmoid_derivative(x):
    return x * (1 - x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def initialize_weights(input_size, hidden_size, output_size):
    np.random.seed(42)
    return np.random.randn(input_size, hidden_size), np.random.randn(hidden_size, output_size)

def forward_propagation(inputs, weights_input_hidden, weights_hidden_output, activation_fn):
    hidden_layer_output = activation_fn(np.dot(inputs, weights_input_hidden))
    output_layer_output = sigmoid(np.dot(hidden_layer_output, weights_hidden_output))
    return hidden_layer_output, output_layer_output

def backward_propagation(inputs, targets, hidden_layer_output, output_layer_output,
                         weights_input_hidden, weights_hidden_output, learning_rate, activation_derivative):
    output_error = targets - output_layer_output
    output_delta = output_error * activation_derivative(output_layer_output)
    hidden_layer_error = output_delta.dot(weights_hidden_output.T)
    hidden_layer_delta = hidden_layer_error * activation_derivative(hidden_layer_output)

    weights_hidden_output += hidden_layer_output.T.dot(output_delta) * learning_rate
    weights_input_hidden += inputs.T.dot(hidden_layer_delta) * learning_rate

def train_neural_network(inputs, targets, hidden_size, output_size, learning_rate, epochs, activation_fn, activation_derivative):
    weights_input_hidden, weights_hidden_output = initialize_weights(inputs.shape[1], hidden_size, output_size)
    loss_history = []

    for epoch in range(epochs):
        hidden_layer_output, output_layer_output = forward_propagation(inputs, weights_input_hidden, weights_hidden_output, activation_fn)
        backward_propagation(inputs, targets, hidden_layer_output, output_layer_output,
                             weights_input_hidden, weights_hidden_output, learning_rate, activation_derivative)

        loss = np.mean((targets - output_layer_output) ** 2)
        loss_history.append(loss)

        if epoch % 1000 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")

    return weights_input_hidden, weights_hidden_output, loss_history

def plot_loss_curves(*loss_histories, labels, title='Training Loss for Different Activation Functions'):
    for loss_history, label in zip(loss_histories, labels):
        plt.plot(loss_history, label=label)
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Example usage
inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
targets = np.array([[0], [1], [1], [0]])

hidden_size = 4
output_size = 1
learning_rate = 0.01
epochs = 10000

sigmoid_params = train_neural_network(inputs, targets, hidden_size, output_size, learning_rate, epochs, sigmoid, sigmoid_derivative)
tanh_params = train_neural_network(inputs, targets, hidden_size, output_size, learning_rate, epochs, tanh, tanh_derivative)
relu_params = train_neural_network(inputs, targets, hidden_size, output_size, learning_rate, epochs, relu, relu_derivative)

plot_loss_curves(sigmoid_params[2], tanh_params[2], relu_params[2], labels=['Sigmoid', 'Tanh', 'ReLU'])