In [1]:
import numpy as np
import pandas as pd

In [None]:
#Batch Gradient Descent 


class TwoLayerNetwork:
    def __init__(self , input_size , hidden_size , output_size):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size , hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size , output_size)
        self.params['b2'] = np.zeros(output_size)

    def forward(self , X):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']

        z1 = np.dot(X , W1) + b1
        a1 = np.maximum(0 , z1) #Relu
        z2 = np.dot(a1 , W2) + b2
        exp_z = np.exp(z2)
        probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        return probs
    
    def loss(self, X, y):
        probs = self.forward(X)
        correct_logprobs = -np.log(probs[range(len(X)), y])
        data_loss = np.sum(correct_logprobs)
        return 1.0/len(X) * data_loss
    
    def train(self , X , y , epochs , lr = 0.1):

        for e in range(epochs):
            z1 = np.dot(X, self.params['W1']) + self.params['b1']
            a1 = np.maximum(0, z1)
            z2 = np.dot(a1, self.params['W2']) + self.params['b2']
            exp_z = np.exp(z2)
            probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)

            #Back prop
            delta3 = probs
            #delta3[range(len(X)) , y] = basically from delta3 chose the probabilites of predicted classes
            delta3[range(len(X)) , y] -= 1
            dw2 = np.dot(a1.T , delta3)
            db2 = np.sum(delta3 , axis= 0)

            delta2 = np.dot(delta3, self.params['W2'].T) * (a1 > 0) # derivative of ReLU
            dw1 = np.dot(X.T , delta2)
            db1 = np.sum(delta2)

            self.params['W1'] -= lr * dw1
            self.params['b1'] -= lr * db1
            self.params['W2'] -= lr * dw2
            self.params['b2'] -= lr * db2

            # Print loss for monitoring training progressq
            if e % 100 == 0:
                loss = self.loss(X, y)
                print("Epoch {}: loss = {}".format(e, loss))




        



        

In [None]:
import numpy as np

class BatchGD:
    def __init__(self, input_size, hidden_size, output_size, batch_size=5):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size) * 0.01,
            'b1': np.zeros(hidden_size),
            'W2': np.random.randn(hidden_size, output_size) * 0.01,
            'b2': np.zeros(output_size)
        }
        self.batch_size = batch_size

    def forward(self, X):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        z1 = np.dot(X, W1) + b1
        a1 = np.maximum(0, z1)  # ReLU
        z2 = np.dot(a1, W2) + b2
        exp_scores = np.exp(z2 - np.max(z2, axis=1, keepdims=True))  # for numerical stability
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
        return probs, a1

    def loss(self, X, y):
        probs, _ = self.forward(X)
        correct_logprobs = -np.log(probs[range(len(X)), y] + 1e-9)
        data_loss = np.mean(correct_logprobs)
        return data_loss

    def train(self, X, y, epochs, lr=0.1):
        num_samples = X.shape[0]

        for i in range(epochs):
            # Shuffle data
            indices = np.arange(num_samples)
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            for start_idx in range(0, num_samples, self.batch_size):
                end_idx = min(start_idx + self.batch_size, num_samples)
                X_batch = X[start_idx:end_idx]
                y_batch = y[start_idx:end_idx]

                x_mean = np.mean(X_batch , axis = 0)
                y_mean = np.mean(y_batch , axis = 0)
                x_std = np.std(X_batch , axis = 0)
                y_std = np.std(y_batch , axis = 0)

                X_batch = (X_batch - x_mean) / x_std
                y_batch = (y_batch-y_mean) / y_std

                # Forward pass
                z1 = np.dot(X_batch, self.params['W1']) + self.params['b1']
                a1 = np.maximum(0, z1)
                z2 = np.dot(a1, self.params['W2']) + self.params['b2']
                exp_scores = np.exp(z2 - np.max(z2, axis=1, keepdims=True))
                probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

                # Backward pass
                delta3 = probs
                delta3[range(len(X_batch)), y_batch] -= 1
                dw2 = np.dot(a1.T, delta3) / len(X_batch)
                db2 = np.sum(delta3, axis=0) / len(X_batch)

                delta2 = np.dot(delta3, self.params['W2'].T) * (a1 > 0)
                dw1 = np.dot(X_batch.T, delta2) / len(X_batch)
                db1 = np.sum(delta2, axis=0) / len(X_batch)

                # Parameter update
                self.params['W1'] -= lr * dw1
                self.params['b1'] -= lr * db1
                self.params['W2'] -= lr * dw2
                self.params['b2'] -= lr * db2

            # Print loss
            if i % 100 == 0 or i == epochs - 1:
                loss_val = self.loss(X, y)
                print(f"Epoch {i}: loss = {loss_val:.4f}")


In [8]:
# Generate a toy dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

# Initialize a neural network
net = TwoLayerNetwork(input_size=2, hidden_size=10, output_size=2)

# Train the neural network
net.train(X, y, epochs=1000)

# Test the neural network
probs = net.forward(X)
predictions = np.argmax(probs, axis=1)
print("Predictions: ", predictions)

Epoch 0: loss = 0.6972371790514336
Epoch 100: loss = 0.07806268479878843
Epoch 200: loss = 0.02323949071526986
Epoch 300: loss = 0.012515676827851974
Epoch 400: loss = 0.008348260706092903
Epoch 500: loss = 0.006182881385986309
Epoch 600: loss = 0.004880797184890369
Epoch 700: loss = 0.0040124989477365305
Epoch 800: loss = 0.003397674243222044
Epoch 900: loss = 0.0029415241607864823
Predictions:  [0 1 1 0]
