In [1]:
import numpy as np

class MLP:
    """
    A simple Multi-Layer Perceptron (MLP) for forward propagation.
    
    Architecture: Input -> Hidden -> Output
    """
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights with small random values
        # W1 shape: (hidden_size, input_size)
        # W2 shape: (output_size, hidden_size)
        self.W1 = np.random.randn(hidden_size, input_size) * 0.01
        self.W2 = np.random.randn(output_size, hidden_size) * 0.01
        
        # Initialize biases to zero
        # b1 shape: (hidden_size, 1)
        # b2 shape: (output_size, 1)
        self.b1 = np.zeros((hidden_size, 1))
        self.b2 = np.zeros((output_size, 1))
        
        print("MLP Initialized:")
        print(f"  W1 shape: {self.W1.shape}")
        print(f"  b1 shape: {self.b1.shape}")
        print(f"  W2 shape: {self.W2.shape}")
        print(f"  b2 shape: {self.b2.shape}")

    def _sigmoid(self, z):
        """Sigmoid activation function."""
        return 1 / (1 + np.exp(-z))

    def forward(self, X):
        """
        Perform a forward pass through the network.
        
        Parameters
        ----------
        X : 2d-array, shape = (input_size, n_samples)
            Input data.
            
        Returns
        -------
        A2 : 2d-array
            The output of the network.
        """
        # Step 1: Input to Hidden
        # Z1 = W1 @ X + b1
        Z1 = np.matmul(self.W1, X) + self.b1
        A1 = self._sigmoid(Z1)
        
        # Step 2: Hidden to Output
        # Z2 = W2 @ A1 + b2
        Z2 = np.matmul(self.W2, A1) + self.b2
        A2 = self._sigmoid(Z2) # Output activation
        
        return A2

In [2]:
# --- Continued from the code above ---

# XOR Problem
# X shape: (n_features, n_samples) -> (2, 4)
X_xor = np.array([[0, 0, 1, 1],   # Feature 1
                  [0, 1, 0, 1]])  # Feature 2

# y shape: (1, 4)
y_xor = np.array([[0, 1, 1, 0]])

# Instantiate our MLP
# 2 inputs, 2 hidden neurons, 1 output
mlp = MLP(input_size=2, hidden_size=2, output_size=1)

# Manually set weights and biases that are known to solve XOR
mlp.W1 = np.array([[20, 20], 
                   [-20, -20]])
mlp.b1 = np.array([[-10], 
                   [30]])
mlp.W2 = np.array([[20, 20]])
mlp.b2 = np.array([[-30]])

# Run the forward pass
predictions = mlp.forward(X_xor)

print("\n--- XOR Problem ---")
print(f"Input data (X):\n{X_xor}")
print(f"True labels (y):\n{y_xor}")
print(f"Predictions (A2):\n{predictions.round(3)}")

MLP Initialized:
  W1 shape: (2, 2)
  b1 shape: (2, 1)
  W2 shape: (1, 2)
  b2 shape: (1, 1)

--- XOR Problem ---
Input data (X):
[[0 0 1 1]
 [0 1 0 1]]
True labels (y):
[[0 1 1 0]]
Predictions (A2):
[[0. 1. 1. 0.]]
