In [1]:
import numpy as np
import pandas as pd

In [7]:
class TwoLayerNetwork:
    def __init__(self , input_size , hidden_size , output_size):
        self.params = {}
        self.params['W1'] = np.random.randn(input_size , hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size , output_size)
        self.params['b2'] = np.zeros(output_size)

    def forward(self , X):
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']

        z1 = np.dot(X , W1) + b1
        a1 = np.maximum(0 , z1) #Relu
        z2 = np.dot(a1 , W2) + b2
        exp_z = np.exp(z2)
        probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)
        return probs
    
    def loss(self, X, y):
        probs = self.forward(X)
        correct_logprobs = -np.log(probs[range(len(X)), y])
        data_loss = np.sum(correct_logprobs)
        return 1.0/len(X) * data_loss
    
    def train(self , X , y , epochs , lr = 0.1):

        for e in range(epochs):
            z1 = np.dot(X, self.params['W1']) + self.params['b1']
            a1 = np.maximum(0, z1)
            z2 = np.dot(a1, self.params['W2']) + self.params['b2']
            exp_z = np.exp(z2)
            probs = exp_z / np.sum(exp_z, axis=1, keepdims=True)

            #Back prop
            delta3 = probs
            delta3[range(len(X)) , y] -= 1
            dw2 = np.dot(a1.T , delta3)
            db2 = np.sum(delta3 , axis= 0)

            delta2 = np.dot(delta3, self.params['W2'].T) * (a1 > 0) # derivative of ReLU
            dw1 = np.dot(X.T , delta2)
            db1 = np.sum(delta2)

            self.params['W1'] -= lr * dw1
            self.params['b1'] -= lr * db1
            self.params['W2'] -= lr * dw2
            self.params['b2'] -= lr * db2

            # Print loss for monitoring training progress
            if e % 100 == 0:
                loss = self.loss(X, y)
                print("Epoch {}: loss = {}".format(e, loss))




        



        

In [8]:
# Generate a toy dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])

# Initialize a neural network
net = TwoLayerNetwork(input_size=2, hidden_size=10, output_size=2)

# Train the neural network
net.train(X, y, epochs=1000)

# Test the neural network
probs = net.forward(X)
predictions = np.argmax(probs, axis=1)
print("Predictions: ", predictions)

Epoch 0: loss = 0.6972371790514336
Epoch 100: loss = 0.07806268479878843
Epoch 200: loss = 0.02323949071526986
Epoch 300: loss = 0.012515676827851974
Epoch 400: loss = 0.008348260706092903
Epoch 500: loss = 0.006182881385986309
Epoch 600: loss = 0.004880797184890369
Epoch 700: loss = 0.0040124989477365305
Epoch 800: loss = 0.003397674243222044
Epoch 900: loss = 0.0029415241607864823
Predictions:  [0 1 1 0]
