In [6]:
import numpy as np

In [7]:
def backward(self, X, y, learning_rate=0.1):
        m = X.shape[0]
        
        # --- Output Layer Gradients ---
        # Derivative of Loss w.r.t Z2 (pred - target)
        dZ2 = self.A2 - y
        # Derivative w.r.t W2
        dW2 = (1 / m) * np.dot(self.A1.T, dZ2)
        # Derivative w.r.t b2
        db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # --- Hidden Layer Gradients ---
        # Derivative of Sigmoid: s * (1 - s)
        d_sigmoid_Z1 = self.A1 * (1 - self.A1)
        
        # Backpropagate error to hidden layer
        dZ1 = np.dot(dZ2, self.W2.T) * d_sigmoid_Z1
        
        dW1 = (1 / m) * np.dot(X.T, dZ1)
        db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # --- Update Weights (Gradient Descent) ---
        self.W2 = self.W2 - learning_rate * dW2
        self.b2 = self.b2 - learning_rate * db2
        self.W1 = self.W1 - learning_rate * dW1
        self.b1 = self.b1 - learning_rate * db1

## Putting it all together
### You can now train the MLP to solve XOR:

In [8]:
import numpy as np

# --- 1. Define the MLP Class ---
class MLP:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize weights with small random values
        # Layer 1: Input -> Hidden
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        
        # Layer 2: Hidden -> Output
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def forward(self, X):
        # --- Layer 1 ---
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.sigmoid(self.Z1)  # Activation Layer 1
        
        # --- Layer 2 ---
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)  # Activation Layer 2 (Output)
        
        return self.A2

    def calculate_loss(self, y_true, y_pred):
        # Binary Cross Entropy Loss
        m = y_true.shape[0]
        epsilon = 1e-15 # To prevent log(0) error
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        
        loss = -1/m * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss

    def backward(self, X, y, learning_rate=0.1):
        m = X.shape[0]
        
        # --- Output Layer Gradients ---
        dZ2 = self.A2 - y
        dW2 = (1 / m) * np.dot(self.A1.T, dZ2)
        db2 = (1 / m) * np.sum(dZ2, axis=0, keepdims=True)
        
        # --- Hidden Layer Gradients ---
        d_sigmoid_Z1 = self.A1 * (1 - self.A1) # Derivative of Sigmoid
        dZ1 = np.dot(dZ2, self.W2.T) * d_sigmoid_Z1
        
        dW1 = (1 / m) * np.dot(X.T, dZ1)
        db1 = (1 / m) * np.sum(dZ1, axis=0, keepdims=True)
        
        # --- Update Weights ---
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2



In [9]:
# --- 2. Prepare Data (XOR Problem) ---
# XOR Logic: 0,0->0 | 0,1->1 | 1,0->1 | 1,1->0
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

# --- 3. Initialize Model ---
# Input: 2 features, Hidden: 4 neurons, Output: 1 neuron
mlp = MLP(input_size=2, hidden_size=4, output_size=1)

# --- 4. Training Loop ---
print("Training started...")
for epoch in range(10000):
    # Forward pass
    output = mlp.forward(X)
    
    # Backward pass & Weight Update
    mlp.backward(X, y, learning_rate=0.5) # Increased LR slightly for faster convergence
    
    if epoch % 1000 == 0:
        loss = mlp.calculate_loss(y, output)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Training started...
Epoch 0, Loss: 0.6932
Epoch 1000, Loss: 0.6931
Epoch 2000, Loss: 0.6931
Epoch 3000, Loss: 0.6931
Epoch 4000, Loss: 0.6931
Epoch 5000, Loss: 0.6931
Epoch 6000, Loss: 0.6931
Epoch 7000, Loss: 0.6931
Epoch 8000, Loss: 0.6931
Epoch 9000, Loss: 0.6931


In [10]:
# --- 5. Final Prediction ---
print("\nFinal Predictions (Raw Probabilities):")
print(mlp.forward(X)) 

print("\nFinal Predictions (Rounded):")
print(np.round(mlp.forward(X)))


Final Predictions (Raw Probabilities):
[[0.4999928 ]
 [0.50000682]
 [0.49999319]
 [0.5000072 ]]

Final Predictions (Rounded):
[[0.]
 [1.]
 [0.]
 [1.]]
