In [None]:
import numpy as np

class TwoLayerNN:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs

        # Initialize weights
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
        self.b1 = np.zeros((1, hidden_dim))
        self.W2 = np.random.randn(hidden_dim, output_dim) * 0.01
        self.b2 = np.zeros((1, output_dim))

    def relu(self, z):
        return np.maximum(0, z)

    def relu_grad(self, z):
        return (z > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # stability
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def compute_loss(self, y_true, y_pred):
        m = y_true.shape[0]
        eps = 1e-9
        return -np.sum(y_true * np.log(y_pred + eps)) / m

    def fit(self, X, y):
        # One-hot encode labels
        num_classes = len(np.unique(y))
        y_onehot = np.eye(num_classes)[y]

        m = X.shape[0]

        for epoch in range(self.epochs):
            # Forward pass
            z1 = X @ self.W1 + self.b1
            a1 = self.relu(z1)
            z2 = a1 @ self.W2 + self.b2
            a2 = self.softmax(z2)

            # Loss
            loss = self.compute_loss(y_onehot, a2)

            # Backpropagation
            dz2 = (a2 - y_onehot) / m
            dW2 = a1.T @ dz2
            db2 = np.sum(dz2, axis=0, keepdims=True)

            da1 = dz2 @ self.W2.T
            dz1 = da1 * self.relu_grad(z1)
            dW1 = X.T @ dz1
            db1 = np.sum(dz1, axis=0, keepdims=True)

            # Update
            self.W1 -= self.lr * dW1
            self.b1 -= self.lr * db1
            self.W2 -= self.lr * dW2
            self.b2 -= self.lr * db2

            # Print progress
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss={loss:.4f}")

    def predict(self, X):
        z1 = X @ self.W1 + self.b1
        a1 = self.relu(z1)
        z2 = a1 @ self.W2 + self.b2
        probs = self.softmax(z2)
        return np.argmax(probs, axis=1)
    
# Fake dataset: 3 classes, 2 features
np.random.seed(42)
X = np.random.randn(200, 2)
y = np.random.choice(3, 200)

model = TwoLayerNN(input_dim=2, hidden_dim=5, output_dim=3, lr=0.1, epochs=1000)
model.fit(X, y)

preds = model.predict(X[:10])
print("Predictions:", preds)    

Epoch 0, Loss=1.0986
Epoch 100, Loss=1.0985
Epoch 200, Loss=1.0984
Epoch 300, Loss=1.0980
Epoch 400, Loss=1.0971
Epoch 500, Loss=1.0945
Epoch 600, Loss=1.0892
Epoch 700, Loss=1.0830
Epoch 800, Loss=1.0786
Epoch 900, Loss=1.0752
Predictions: [2 0 0 0 0 1 2 0 0 1]


In [None]:
import numpy as np

class TwoLayerNN_Multi:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.01, epochs=200):
        self.learning_rate = learning_rate
        self.epochs = epochs

        self.w1 = np.random.randn(input_dim,hidden_dim)*0.01
        self.b1 = np.zeros((1,hidden_dim))
        self.w2 = np.random.randn(hidden_dim,output_dim)*0.01
        self.b2 = np.zeros((1,output_dim))

    def relu(self, z):
        return np.maximum(0,z)    
    
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z,axis=1,keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)
    
    def calculate_loss(self, y_pred, y_true):
        eps = 1e-9
        return -np.mean(np.sum(y_true * np.log(y_pred+eps), axis=1))
    
    def relu_grad(self,z):
        return (z > 0).astype(int)

    def fit(self, X, y):
        X = np.asarray(X, dtype=np.float64)
        m = X.shape[0]

        num_classes = len(np.unique(y))
        y_onehot = np.eye(num_classes)[y]

        for epoch in range(self.epochs):
            # Forward Pass
            z1 = X @ self.w1 + self.b1
            a1 = self.relu(z1)    
            z2 = a1 @ self.w2 + self.b2
            a2 = self.softmax(z2)

            # Calculate Loss
            loss = self.calculate_loss(a2, y_onehot)

            # Back Propogation
            dz2 = (a2 - y_onehot) / m
            dw2 = a1.T @ dz2
            db2 = np.sum(dz2, axis=0, keepdims=True)

            da1 = dz2 @ self.w2.T
            dz1 = da1 * self.relu_grad(z1)
            dw1 = X.T @ dz1
            db1 = np.sum(dz1, axis=0, keepdims=True)

            # Updating the weights
            self.w2 -= self.learning_rate * dw2
            self.b2 -= self.learning_rate * db2
            self.w1 -= self.learning_rate * dw1
            self.b1 -= self.learning_rate * db1

        return self

    def predict(self,X):
        X = np.asarray(X, dtype=np.float64)

        z1 = X @ self.w1 + self.b1
        a1 = self.relu(z1)    
        z2 = a1 @ self.w2 + self.b2
        a2 = self.softmax(z2)  

        return np.argmax(a2, axis=1)

    def score(self,X,y):
        y_pred = self.predict(X)
        return np.mean(y_pred == y)  

In [25]:
np.random.seed(42)
X = np.random.randn(200,4)
y = np.random.choice(3,200)

model = TwoLayerNN_Multi(input_dim=4,hidden_dim=5,output_dim=3)
model.fit(X,y)

model.predict(X[:10])

array([0, 2, 1, 0, 0, 2, 2, 1, 0, 1])