In [1]:
import numpy as np

class XOR3MLP:
    def __init__(self, learning_rate=0.1):
        """Initialize MLP for 3-input XOR with 2 hidden layers"""
        self.lr = learning_rate

        # Weights and biases
        # Input (3) -> Hidden1 (4)
        self.W1 = np.random.randn(3, 4) * 0.01
        self.b1 = np.zeros((1, 4))

        # Hidden1 (4) -> Hidden2 (4)
        self.W2 = np.random.randn(4, 4) * 0.01
        self.b2 = np.zeros((1, 4))

        # Hidden2 (4) -> Output (1)
        self.W3 = np.random.randn(4, 1) * 0.01
        self.b3 = np.zeros((1, 1))

    def sigmoid(self, x):
        """Sigmoid activation"""
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

    def sigmoid_derivative(self, x):
        """Derivative of sigmoid"""
        return x * (1 - x)

    def forward(self, X):
        """Forward propagation"""
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W3) + self.b3
        self.a3 = self.sigmoid(self.z3)
        return self.a3

    def backward(self, X, y, output):
        """Backward propagation with derivative calculations"""
        # Output layer derivatives
        # Error at output: difference between target and prediction
        self.error3 = y - output
        # Delta for output layer: error * derivative of sigmoid at output
        # dL/dz3 = (y - a3) * a3 * (1 - a3), where a3 is the output
        self.delta3 = self.error3 * self.sigmoid_derivative(output)

        # Hidden2 layer derivatives
        # Error propagated back to Hidden2: delta3 dotted with W3 transpose
        # dL/dz2 = dL/dz3 * W3^T (chain rule)
        self.error2 = np.dot(self.delta3, self.W3.T)
        # Delta for Hidden2: error * derivative of sigmoid at Hidden2
        # dL/dz2 = dL/dz2 * a2 * (1 - a2), where a2 is Hidden2 activation
        self.delta2 = self.error2 * self.sigmoid_derivative(self.a2)

        # Hidden1 layer derivatives
        # Error propagated back to Hidden1: delta2 dotted with W2 transpose
        # dL/dz1 = dL/dz2 * W2^T (chain rule)
        self.error1 = np.dot(self.delta2, self.W2.T)
        # Delta for Hidden1: error * derivative of sigmoid at Hidden1
        # dL/dz1 = dL/dz1 * a1 * (1 - a1), where a1 is Hidden1 activation
        self.delta1 = self.error1 * self.sigmoid_derivative(self.a1)

        # Weight and bias updates (gradient descent)
        # dL/dW3 = a2^T * delta3 (gradient of loss w.r.t. W3)
        self.W3 += self.lr * np.dot(self.a2.T, self.delta3)
        # dL/db3 = sum(delta3) (gradient of loss w.r.t. b3)
        self.b3 += self.lr * np.sum(self.delta3, axis=0, keepdims=True)
        # dL/dW2 = a1^T * delta2 (gradient of loss w.r.t. W2)
        self.W2 += self.lr * np.dot(self.a1.T, self.delta2)
        # dL/db2 = sum(delta2) (gradient of loss w.r.t. b2)
        self.b2 += self.lr * np.sum(self.delta2, axis=0, keepdims=True)
        # dL/dW1 = X^T * delta1 (gradient of loss w.r.t. W1)
        self.W1 += self.lr * np.dot(X.T, self.delta1)
        # dL/db1 = sum(delta1) (gradient of loss w.r.t. b1)
        self.b1 += self.lr * np.sum(self.delta1, axis=0, keepdims=True)

    def train(self, X, y, epochs=10000, verbose=False):
        """Train the MLP"""
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, output)
            if verbose and epoch % 1000 == 0:
                loss = np.mean(np.square(y - output))
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def predict(self, X):
        """Predict binary output"""
        return (self.forward(X) >= 0.5).astype(np.int32)

# 3-input XOR dataset
X = np.array([
    [0, 0, 0],  # 0 (even)
    [0, 0, 1],  # 1 (odd)
    [0, 1, 0],  # 1 (odd)
    [0, 1, 1],  # 0 (even)
    [1, 0, 0],  # 1 (odd)
    [1, 0, 1],  # 0 (even)
    [1, 1, 0],  # 0 (even)
    [1, 1, 1]   # 1 (odd)
], dtype=np.float32)

y = np.array([[0], [1], [1], [0], [1], [0], [0], [1]], dtype=np.float32)

# Example usage
if __name__ == "__main__":
    # Set random seed for reproducibility
    np.random.seed(42)

    # Create and train MLP
    mlp = XOR3MLP(learning_rate=0.1)
    mlp.train(X, y, epochs=10000, verbose=True)

    # Test the model
    predictions = mlp.predict(X)
    print("\nResults:")
    for i in range(len(X)):
        print(f"Input: {X[i]} -> Predicted: {predictions[i][0]} (True: {int(y[i][0])})")

    # Calculate accuracy
    accuracy = np.mean(predictions == y)
    print(f"\nAccuracy: {accuracy:.4f}")

Epoch 0, Loss: 0.2500
Epoch 1000, Loss: 0.2500
Epoch 2000, Loss: 0.2500
Epoch 3000, Loss: 0.2500
Epoch 4000, Loss: 0.2500
Epoch 5000, Loss: 0.2500
Epoch 6000, Loss: 0.2500
Epoch 7000, Loss: 0.2500
Epoch 8000, Loss: 0.2500
Epoch 9000, Loss: 0.2500

Results:
Input: [0. 0. 0.] -> Predicted: 1 (True: 0)
Input: [0. 0. 1.] -> Predicted: 1 (True: 1)
Input: [0. 1. 0.] -> Predicted: 0 (True: 1)
Input: [0. 1. 1.] -> Predicted: 0 (True: 0)
Input: [1. 0. 0.] -> Predicted: 1 (True: 1)
Input: [1. 0. 1.] -> Predicted: 1 (True: 0)
Input: [1. 1. 0.] -> Predicted: 0 (True: 0)
Input: [1. 1. 1.] -> Predicted: 0 (True: 1)

Accuracy: 0.5000
