In [1]:
import numpy as np

In [2]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [3]:
def initialize_parameters(input_size, hidden_size, output_size):
    W1 = np.random.uniform(-1, 1, (hidden_size, input_size))
    b1 = np.random.uniform(-1, 1, (hidden_size, 1))
    W2 = np.random.uniform(-1, 1, (output_size, hidden_size))
    b2 = np.random.uniform(-1, 1, (output_size, 1))
    return W1, b1, W2, b2

In [4]:
def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(W1, X) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    return A1, A2

In [5]:
def compute_loss(Y, A2):
    m = Y.shape[1]
    loss = -1/m * np.sum(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))
    return loss

In [6]:
def backward_propagation(X, Y, A1, A2, W1, W2, b1, b2):
    m = Y.shape[1]
    dZ2 = A2 - Y
    dW2 = 1/m * np.dot(dZ2, A1.T)
    db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.dot(W2.T, dZ2) * A1 * (1 - A1)
    dW1 = 1/m * np.dot(dZ1, X.T)
    db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

In [7]:
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    return W1, b1, W2, b2

In [8]:
def train(X, Y, hidden_size, output_size, learning_rate, num_epochs):
    input_size = X.shape[0]
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)
    
    for epoch in range(num_epochs):
        A1, A2 = forward_propagation(X, W1, b1, W2, b2)
        loss = compute_loss(Y, A2)
        dW1, db1, dW2, db2 = backward_propagation(X, Y, A1, A2, W1, W2, b1, b2)
        W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
        
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Loss: {loss}")
    
    return W1, b1, W2, b2

In [9]:
def predict(X, W1, b1, W2, b2):
    _, A2 = forward_propagation(X, W1, b1, W2, b2)
    predictions = np.round(A2)
    return predictions

In [10]:
# Example usage
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
Y = np.array([[0, 1, 1, 0]])

hidden_size = 2
output_size = 1
learning_rate = 0.01
num_epochs = 10000

W1, b1, W2, b2 = train(X, Y, hidden_size, output_size, learning_rate, num_epochs)

# Make predictions
predictions = predict(X, W1, b1, W2, b2)
print("Predictions:", predictions)

Epoch 0, Loss: 0.749097661743568
Epoch 100, Loss: 0.7142572283900298
Epoch 200, Loss: 0.7002204606275971
Epoch 300, Loss: 0.6947210592039549
Epoch 400, Loss: 0.6925676503169234
Epoch 500, Loss: 0.6916996597050943
Epoch 600, Loss: 0.6913212329273984
Epoch 700, Loss: 0.691128525483351
Epoch 800, Loss: 0.6910060888644972
Epoch 900, Loss: 0.6909101284379104
Epoch 1000, Loss: 0.690824025951917
Epoch 1100, Loss: 0.6907414603783852
Epoch 1200, Loss: 0.6906600122195115
Epoch 1300, Loss: 0.6905787410699606
Epoch 1400, Loss: 0.6904972678629103
Epoch 1500, Loss: 0.6904154269280811
Epoch 1600, Loss: 0.6903331339261642
Epoch 1700, Loss: 0.6902503356335131
Epoch 1800, Loss: 0.690166990796441
Epoch 1900, Loss: 0.6900830627985083
Epoch 2000, Loss: 0.6899985168297458
Epoch 2100, Loss: 0.6899133187774698
Epoch 2200, Loss: 0.6898274347788711
Epoch 2300, Loss: 0.6897408310300118
Epoch 2400, Loss: 0.6896534736954024
Epoch 2500, Loss: 0.6895653288577888
Epoch 2600, Loss: 0.6894763624844967
Epoch 2700, Loss: