In [9]:
!pip install numpy matplotlib

Collecting numpy
  Downloading numpy-2.3.5-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.7-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp313-cp313-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.61.0-cp313-cp313-win_amd64.whl.metadata (115 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp313-cp313-win_amd64.whl.metadata (6.4 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-12.0.0-cp313-cp313-win_amd64.whl.metadata (9.0 kB)
Collecting pyparsing>=3 (from matplotlib)
  Downloading pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Downloading numpy-2.3.5-cp313-cp313-win_amd64.whl (12.8 MB)
   ---------------------------------------- 0.0/12.8 MB ? eta -:--:--


Gradient Checking

In [14]:
import numpy as np
import sys
import os

sys.path.append(os.path.abspath(os.path.join('..')))

from lib.network import Network
from lib.layers import Dense
from lib.activations import Tanh, Sigmoid
from lib.losses import MSE


def verify_dense_layer_math():
    print("Verifying Dense Layer Gradients")
    
    # Setup
    input_size = 3
    output_size = 2
    epsilon = 1e-7
    
    # Create a dummy layer and loss
    layer = Dense(input_size, output_size)
    loss_fn = MSE()
    
    # Random input and target
    x = np.random.randn(1, input_size)
    y_true = np.random.randn(1, output_size)
    
    # 1. Analytical Gradient 
    # Forward
    y_pred = layer.forward(x)
    initial_loss = loss_fn.forward(y_pred, y_true)
    
    # Backward
    output_grad = loss_fn.backward(y_pred, y_true)
    
    # Calculate dW analytically (Math: X.T * dY)
    analytical_dW = np.dot(x.T, output_grad)
    analytical_db = np.sum(output_grad, axis=0, keepdims=True)
    
    # 2. Numerical Gradient (for Weights) 
    print(f"Checking Weights (Sample of {layer.weights.size} params)")
    
    passed = True
    
    # Iterate over all weights
    it = np.nditer(layer.weights, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        original_value = layer.weights[idx]
        
        # Calculate L(w + epsilon)
        layer.weights[idx] = original_value + epsilon
        y_plus = layer.forward(x)
        loss_plus = loss_fn.forward(y_plus, y_true)
        
        # Calculate L(w - epsilon)
        layer.weights[idx] = original_value - epsilon
        y_minus = layer.forward(x)
        loss_minus = loss_fn.forward(y_minus, y_true)
        
        # Numerical Gradient
        numerical_grad = (loss_plus - loss_minus) / (2 * epsilon)
        
        # Analytical Gradient for this weight
        analytical_grad = analytical_dW[idx]
        
        # Restore weight
        layer.weights[idx] = original_value
        
        # Compare
        diff = abs(numerical_grad - analytical_grad)
        if diff > 1e-5:
            passed = False
            print(f"Weight {idx} FAILED: Num={numerical_grad:.8f}, Ana={analytical_grad:.8f}, Diff={diff:.8f}")
            
        it.iternext()
        
    if passed:
        print(" Weights Gradient Check PASSED!")
    else:
        print(" Weights Gradient Check FAILED!")

    #  3. Numerical Gradient (for Biases) 
    print("Checking Biases")
    passed_bias = True
    it = np.nditer(layer.bias, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        original_value = layer.bias[idx]
        
        # L(b + epsilon)
        layer.bias[idx] = original_value + epsilon
        y_plus = layer.forward(x)
        loss_plus = loss_fn.forward(y_plus, y_true)
        
        # L(b - epsilon)
        layer.bias[idx] = original_value - epsilon
        y_minus = layer.forward(x)
        loss_minus = loss_fn.forward(y_minus, y_true)
        
        numerical_grad = (loss_plus - loss_minus) / (2 * epsilon)
        analytical_grad = analytical_db[idx]
        
        layer.bias[idx] = original_value
        
        diff = abs(numerical_grad - analytical_grad)
        if diff > 1e-5:
            passed_bias = False
            print(f"Bias {idx} FAILED: Num={numerical_grad:.8f}, Ana={analytical_grad:.8f}, Diff={diff:.8f}")
            
        it.iternext()

    if passed_bias:
        print(" Bias Gradient Check PASSED!")
    else:
        print(" Bias Gradient Check FAILED!")

# Run the verification
verify_dense_layer_math()

Verifying Dense Layer Gradients
Checking Weights (Sample of 6 params)
 Weights Gradient Check PASSED!
Checking Biases
 Bias Gradient Check PASSED!


In [15]:
# XOR PROBLEM 
print("\n Solving XOR ")

# Data
X = np.array([[0,0], [0,1], [1,0], [1,1]])
y = np.array([[0], [1], [1], [0]])

# Network (2-4-1 Architecture)
net = Network()
net.add(Dense(2, 4))
net.add(Tanh())      
net.add(Dense(4, 1))
net.add(Sigmoid())   

net.use(MSE())

# Train
net.train(X, y, epochs=50000, learning_rate=0.1)

# Predict
print("Final Predictions:")
print(net.predict(X))


 Solving XOR 
Epoch 1000/50000, Error: 0.249997
Epoch 2000/50000, Error: 0.249996
Epoch 3000/50000, Error: 0.249993
Epoch 4000/50000, Error: 0.249989
Epoch 5000/50000, Error: 0.249978
Epoch 6000/50000, Error: 0.249941
Epoch 7000/50000, Error: 0.249607
Epoch 8000/50000, Error: 0.229006
Epoch 9000/50000, Error: 0.089937
Epoch 10000/50000, Error: 0.017719
Epoch 11000/50000, Error: 0.006316
Epoch 12000/50000, Error: 0.003466
Epoch 13000/50000, Error: 0.002310
Epoch 14000/50000, Error: 0.001707
Epoch 15000/50000, Error: 0.001343
Epoch 16000/50000, Error: 0.001102
Epoch 17000/50000, Error: 0.000932
Epoch 18000/50000, Error: 0.000805
Epoch 19000/50000, Error: 0.000708
Epoch 20000/50000, Error: 0.000631
Epoch 21000/50000, Error: 0.000569
Epoch 22000/50000, Error: 0.000517
Epoch 23000/50000, Error: 0.000474
Epoch 24000/50000, Error: 0.000437
Epoch 25000/50000, Error: 0.000406
Epoch 26000/50000, Error: 0.000378
Epoch 27000/50000, Error: 0.000354
Epoch 28000/50000, Error: 0.000333
Epoch 29000/50