In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Generating 500 random data points.

np.random.seed(42)

N = 500
X = np.random.randn(N, 2) * 2
radii = np.sum(X**2, axis=1)
Y = np.zeros((N, 3))
for i in range(N):
    if radii[i] < 2:
        Y[i, 0] = 1
    elif radii[i] < 6:
        Y[i, 1] = 1
    else:
        Y[i, 2] = 1

X_mean = X.mean(axis=0)
X_std = X.std(axis=0) + 1e-9
X = (X - X_mean) / X_std

# Code for 3-Layer NN starts here below.

def relu(x): return np.maximum(0, x)
def relu_deriv(x): return (x > 0).astype(float)
def softmax(x):
    ex = np.exp(x - np.max(x, axis=1, keepdims=True))
    return ex / np.sum(ex, axis=1, keepdims=True)
def cross_entropy(pred, true):
    return -np.mean(np.sum(true * np.log(pred + 1e-12), axis=1))

def xavier_init(fan_in, fan_out):
    limit = np.sqrt(6.0 / (fan_in + fan_out))
    return np.random.uniform(-limit, limit, size=(fan_in, fan_out))

class ThreeLayerNN:
    def __init__(self, input_dim=2, h1=32, h2=16, output_dim=3, lr=0.01):
        self.lr = lr
        self.W1 = xavier_init(input_dim, h1)
        self.b1 = np.zeros((1, h1))
        self.W2 = xavier_init(h1, h2)
        self.b2 = np.zeros((1, h2))
        self.W3 = xavier_init(h2, output_dim)
        self.b3 = np.zeros((1, output_dim))

    def forward(self, X):
        self.z1 = X.dot(self.W1) + self.b1
        self.a1 = relu(self.z1)
        self.z2 = self.a1.dot(self.W2) + self.b2
        self.a2 = relu(self.z2)
        self.z3 = self.a2.dot(self.W3) + self.b3
        self.out = softmax(self.z3)
        return self.out

    def backward(self, X, Y):
        N = X.shape[0]
        dZ3 = (self.out - Y) / N
        dW3 = self.a2.T.dot(dZ3)
        db3 = np.sum(dZ3, axis=0, keepdims=True)

        dA2 = dZ3.dot(self.W3.T)
        dZ2 = dA2 * relu_deriv(self.z2)
        dW2 = self.a1.T.dot(dZ2)
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        dA1 = dZ2.dot(self.W2.T)
        dZ1 = dA1 * relu_deriv(self.z1)
        dW1 = X.T.dot(dZ1)
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def train_epoch(self, X, Y, batch_size=64):
        idx = np.random.permutation(len(X))
        Xs, Ys = X[idx], Y[idx]
        for start in range(0, len(X), batch_size):
            xb = Xs[start:start+batch_size]
            yb = Ys[start:start+batch_size]
            self.forward(xb)
            self.backward(xb, yb)

model = ThreeLayerNN(lr=0.01)
epochs = 1000
batch_size = 64
losses = []

for ep in range(epochs):
    model.train_epoch(X, Y, batch_size=batch_size)
    pred = model.forward(X)
    loss = cross_entropy(pred, Y)
    losses.append(loss)
    if ep % 100 == 0:
        print(f"Epoch {ep}, Loss: {loss:.4f}")

# Plotting the Loss Curve Below

plt.figure(figsize=(8, 5))
plt.plot(losses, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()
plt.grid(True)
plt.show()