### HW3 2-Layer NN

### Srushti Nayak

#### Importing required libraries

In [70]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

from PIL import Image

import matplotlib.pyplot as plt

In [71]:
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [72]:
x_train = np.array([item[0].numpy().flatten() for item in trainset])

In [73]:
x_train.shape

(50000, 3072)

In [74]:
y_train = np.array([item[1] for item in trainset])

In [75]:
y_train.shape

(50000,)

In [76]:
x_test = np.array([item[0].numpy().flatten() for item in testset])

In [77]:
y_test = np.array([item[1] for item in testset])

#### Implementing 2-layer NN

In [83]:
# sigmoid withought regularization
class NNSig_without_reg:
    
    def __init__(self, x_train,y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        
    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) 
        hidden_layer_output = 1 / (1 + np.exp(-hidden_layer_input))
        output_layer_input = hidden_layer_output.dot(self.w2) 
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self,y_batch,scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))
        return loss
        
    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch,x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)
        
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)
        
        grad_hidden_layer_input = grad_hidden * (hidden_layer_output * (1 - hidden_layer_output))
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)

        updated_w1= self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        
        return updated_w1,updated_w2
        
            
    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]
                
                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)
                
                updated_w1,updated_w2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch,x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
        
            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss}")

    def test(self,x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1)
        hidden_layer_output = 1 / (1 + np.exp(-hidden_layer_input))
        output_layer_input = hidden_layer_output.dot(self.w2) 
        predicted_labels = np.argmax(output_layer_input, axis=1)
        
        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")
        
        

In [84]:
input_size = 32*32*3
hidden_size = 256
output_size = 10
learning_rate = 0.08
epochs = 10

In [85]:
# sigmoid without regularization
NN1 = NNSig_without_reg(x_train, y_train)
NN1.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs)
NN1.train()

Epoch 1/10, Loss: 521.6798093085264
Epoch 2/10, Loss: 422.7967696026113
Epoch 3/10, Loss: 360.9669021138811
Epoch 4/10, Loss: 316.910186904673
Epoch 5/10, Loss: 281.8931528859355
Epoch 6/10, Loss: 253.88620032181865
Epoch 7/10, Loss: 231.46129196308289
Epoch 8/10, Loss: 213.58588919176515
Epoch 9/10, Loss: 199.10188220429865
Epoch 10/10, Loss: 187.33107979829987


In [86]:
NN1.test(x_test, y_test)


Accuracy on test set: 0.2824


#### tanh activation function without regularization

In [87]:
#tanh without regularization
class NNtanh_without_reg:
    
    def __init__(self, x_train,y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))

        
    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) + self.bias1
        hidden_layer_output = np.tanh(hidden_layer_input)
        output_layer_input = hidden_layer_output.dot(self.w2) +self.bias2
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self,y_batch,scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))
        return loss
        
    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch,x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)
        
        grad_bias2 = np.sum(grad_scores, axis=0, keepdims=True)
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)
        
        grad_hidden_layer_input = grad_hidden * (1 - np.tanh(hidden_layer_output)**2)
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)
        grad_bias1 = np.sum(grad_hidden_layer_input, axis=0, keepdims=True)

        updated_w1= self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        updated_bias1 = self.bias1 - self.learning_rate * grad_bias1
        updated_bias2 = self.bias2 - self.learning_rate * grad_bias2

        return updated_w1,updated_w2,updated_bias1,updated_bias2
        
    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]
                
                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)
                
                updated_w1,updated_w2,updated_bias1,updated_bias2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch,x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
                self.bias1 = updated_bias1
                self.bias2 = updated_bias2
        
            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss}")

    def test(self,x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1) + self.bias1
        hidden_layer_output = 1 / (1 + np.exp(-hidden_layer_input))
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2 
        predicted_labels = np.argmax(output_layer_input, axis=1)
        
        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")
        
        

In [88]:
# set hyperparameters 
input_size = 32*32*3
hidden_size = 256
output_size = 10
learning_rate = 0.08
epochs = 10

In [89]:
# tanh without regularization
NN2 = NNtanh_without_reg(x_train, y_train)
NN2.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs)
NN2.train()
NN2.test(x_test, y_test)

Epoch 1/10, Loss: 767.5716038496046
Epoch 2/10, Loss: 619.2820759638171
Epoch 3/10, Loss: 496.89237308706413
Epoch 4/10, Loss: 381.23148674566517
Epoch 5/10, Loss: 277.81531790675
Epoch 6/10, Loss: 206.72043248373927
Epoch 7/10, Loss: 169.7846781925635
Epoch 8/10, Loss: 155.36255408214942
Epoch 9/10, Loss: 152.02328155772304
Epoch 10/10, Loss: 151.78284486187283
Accuracy on test set: 0.2382


#### relu activation function without regularization

In [93]:
class NNReLU_without_reg:
    
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))

    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) + self.bias1
        hidden_layer_output = np.maximum(0, hidden_layer_input)  
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self, y_batch, scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))
        return loss

    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)

        grad_bias2 = np.sum(grad_scores, axis=0, keepdims=True)
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)

        grad_hidden_layer_input = grad_hidden * (hidden_layer_output > 0).astype(int) 
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)
        grad_bias1 = np.sum(grad_hidden_layer_input, axis=0, keepdims=True)

        updated_w1 = self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        updated_bias1 = self.bias1 - self.learning_rate * grad_bias1
        updated_bias2 = self.bias2 - self.learning_rate * grad_bias2

        return updated_w1, updated_w2, updated_bias1, updated_bias2

    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]

                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)

                updated_w1, updated_w2, updated_bias1, updated_bias2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
                
    def test(self,x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1) + self.bias1
        hidden_layer_output = 1 / (1 + np.exp(-hidden_layer_input))
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2 
        predicted_labels = np.argmax(output_layer_input, axis=1)
        
        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")
        
         


In [94]:
# relu without regularization
NN3 = NNReLU_without_reg(x_train, y_train)
NN3.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs)
NN3.train()
NN3.test(x_test, y_test)

  loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))


Accuracy on test set: 0.2337


#### sigmoid with l2 regularization 

In [96]:
class NNSigmoid_with_reg:
    
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs, l2_reg):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))
        self.l2_reg = l2_reg

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) + self.bias1
        hidden_layer_output = self.sigmoid(hidden_layer_input)
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self, y_batch, scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        data_loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))

        reg_loss = 0.5 * self.l2_reg * (np.sum(self.w1**2) + np.sum(self.w2**2))

        loss = data_loss + reg_loss
        return loss

    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)

        grad_bias2 = np.sum(grad_scores, axis=0, keepdims=True)
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)

        grad_hidden_layer_input = grad_hidden * (hidden_layer_output * (1 - hidden_layer_output))
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)
        grad_bias1 = np.sum(grad_hidden_layer_input, axis=0, keepdims=True)

        # Add L2 regularization terms to gradients
        grad_w1 += self.l2_reg * self.w1
        grad_w2 += self.l2_reg * self.w2

        updated_w1 = self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        updated_bias1 = self.bias1 - self.learning_rate * grad_bias1
        updated_bias2 = self.bias2 - self.learning_rate * grad_bias2

        return updated_w1, updated_w2, updated_bias1, updated_bias2

    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]

                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)

                updated_w1, updated_w2, updated_bias1, updated_bias2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
                self.bias1 = updated_bias1
                self.bias2 = updated_bias2

            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss}")

    def test(self, x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1) + self.bias1
        hidden_layer_output = self.sigmoid(hidden_layer_input)
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        predicted_labels = np.argmax(output_layer_input, axis=1)

        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")


In [98]:
l2_reg = 0.001

In [100]:
# sigmoid with l2 regularization
NN4 = NNSigmoid_with_reg(x_train, y_train)
NN4.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN4.train()
NN4.test(x_test, y_test)

Epoch 1/10, Loss: 816.8334760285798
Epoch 2/10, Loss: 707.7861243342388
Epoch 3/10, Loss: 630.5791178341631
Epoch 4/10, Loss: 569.97450275878
Epoch 5/10, Loss: 518.3646845242467
Epoch 6/10, Loss: 475.5875249878342
Epoch 7/10, Loss: 441.0709563736639
Epoch 8/10, Loss: 412.9553544705075
Epoch 9/10, Loss: 389.4664998797439
Epoch 10/10, Loss: 369.3752258170813
Accuracy on test set: 0.3102


#### tanh with l2 regularization

In [101]:
#tanh with l2 regularization
class NNTanh_with_reg:
    
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs, l2_reg):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))
        self.l2_reg = l2_reg

    def tanh(self, x):
        return np.tanh(x)

    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) + self.bias1
        hidden_layer_output = self.tanh(hidden_layer_input)
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self, y_batch, scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        data_loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))

        # Calculate regularization loss
        reg_loss = 0.5 * self.l2_reg * (np.sum(self.w1**2) + np.sum(self.w2**2))

        # Total loss
        loss = data_loss + reg_loss
        return loss

    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)

        grad_bias2 = np.sum(grad_scores, axis=0, keepdims=True)
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)

        grad_hidden_layer_input = grad_hidden * (1 - np.tanh(hidden_layer_output)**2)
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)
        grad_bias1 = np.sum(grad_hidden_layer_input, axis=0, keepdims=True)

        # Add L2 regularization terms to gradients
        grad_w1 += self.l2_reg * self.w1
        grad_w2 += self.l2_reg * self.w2

        updated_w1 = self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        updated_bias1 = self.bias1 - self.learning_rate * grad_bias1
        updated_bias2 = self.bias2 - self.learning_rate * grad_bias2

        return updated_w1, updated_w2, updated_bias1, updated_bias2

    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]

                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)

                updated_w1, updated_w2, updated_bias1, updated_bias2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
                self.bias1 = updated_bias1
                self.bias2 = updated_bias2

            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss}")

    def test(self, x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1) + self.bias1
        hidden_layer_output = self.tanh(hidden_layer_input)
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        predicted_labels = np.argmax(output_layer_input, axis=1)

        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")


In [102]:
# tanh with l2 regularization
NN5 = NNTanh_with_reg(x_train, y_train)
NN5.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN5.train()
NN5.test(x_test, y_test)

Epoch 1/10, Loss: 1185.6672603998968
Epoch 2/10, Loss: 921.1351214379335
Epoch 3/10, Loss: 728.0993826846379
Epoch 4/10, Loss: 603.1279204840587
Epoch 5/10, Loss: 503.6198008752178
Epoch 6/10, Loss: 439.6306457028636
Epoch 7/10, Loss: 402.0860815654384
Epoch 8/10, Loss: 379.3155023120542
Epoch 9/10, Loss: 363.56249971790294
Epoch 10/10, Loss: 350.1965891948523
Accuracy on test set: 0.3773


#### relu with l2 regularization

In [104]:
class NNReLU_with_reg:
    
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.batch_size = 128
        
    def setHyperParameters(self, input_size, hidden_size, output_size, learning_rate, epochs, l2_reg):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.input_size = input_size
        self.w1 = np.random.randn(input_size, hidden_size)
        self.w2 = np.random.randn(hidden_size, output_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))
        self.l2_reg = l2_reg

    def forward(self, x_batch):
        hidden_layer_input = x_batch.dot(self.w1) + self.bias1
        hidden_layer_output = np.maximum(0, hidden_layer_input)  
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        scores = np.exp(output_layer_input - np.max(output_layer_input, axis=1, keepdims=True))

        return hidden_layer_input, hidden_layer_output, scores

    def loss(self, y_batch, scores):
        correct_scores = scores[np.arange(len(scores)), y_batch]
        data_loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))

        # Calculate regularization loss
        reg_loss = 0.5 * self.l2_reg * (np.sum(self.w1**2) + np.sum(self.w2**2))

        # Total loss
        loss = data_loss + reg_loss
        return loss

    def backward(self, hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch):
        grad_scores = scores / np.sum(scores, axis=1, keepdims=True)
        grad_scores[np.arange(len(grad_scores)), y_batch] -= 1
        grad_scores /= len(grad_scores)

        grad_bias2 = np.sum(grad_scores, axis=0, keepdims=True)
        grad_w2 = hidden_layer_output.T.dot(grad_scores)
        grad_hidden = grad_scores.dot(self.w2.T)

        grad_hidden_layer_input = grad_hidden * (hidden_layer_output > 0).astype(int)  
        grad_w1 = x_batch.T.dot(grad_hidden_layer_input)
        grad_bias1 = np.sum(grad_hidden_layer_input, axis=0, keepdims=True)

        grad_w1 += self.l2_reg * self.w1
        grad_w2 += self.l2_reg * self.w2

        updated_w1 = self.w1 - self.learning_rate * grad_w1
        updated_w2 = self.w2 - self.learning_rate * grad_w2
        updated_bias1 = self.bias1 - self.learning_rate * grad_bias1
        updated_bias2 = self.bias2 - self.learning_rate * grad_bias2

        return updated_w1, updated_w2, updated_bias1, updated_bias2

    def train(self):
        for epoch in range(self.epochs):
            for i in range(0, len(self.x_train), self.batch_size):
                x_batch = self.x_train[i:i+self.batch_size]
                y_batch = self.y_train[i:i+self.batch_size]

                hidden_layer_input, hidden_layer_output, scores = self.forward(x_batch)
                loss = self.loss(y_batch, scores)

                updated_w1, updated_w2, updated_bias1, updated_bias2 = self.backward(hidden_layer_input, hidden_layer_output, scores, y_batch, x_batch)
                self.w1 = updated_w1
                self.w2 = updated_w2
                self.bias1 = updated_bias1
                self.bias2 = updated_bias2

            print(f"Epoch {epoch+1}/{self.epochs}, Loss: {loss}")

    def test(self, x_test, y_test):
        hidden_layer_input = x_test.dot(self.w1) + self.bias1
        hidden_layer_output = np.maximum(0, hidden_layer_input)  # ReLU activation
        output_layer_input = hidden_layer_output.dot(self.w2) + self.bias2
        predicted_labels = np.argmax(output_layer_input, axis=1)

        accuracy = np.mean(predicted_labels == y_test)
        print(f"Accuracy on test set: {accuracy}")


In [105]:
# relu with l2 regularization
NN6 = NNReLU_with_reg(x_train, y_train)
NN6.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN6.train()
NN6.test(x_test, y_test)

  data_loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))


Epoch 1/10, Loss: 5941.359796140371
Epoch 2/10, Loss: 2540.908546562265
Epoch 3/10, Loss: 2321.9349600633877
Epoch 4/10, Loss: 2068.5472031319505
Epoch 5/10, Loss: 1252.6562611694876
Epoch 6/10, Loss: 1393.5854680134462
Epoch 7/10, Loss: 932.458032173202
Epoch 8/10, Loss: 852.3384562223658
Epoch 9/10, Loss: 745.6948176707347
Epoch 10/10, Loss: 709.2040789049197
Accuracy on test set: 0.3151


#### Hyperparameter tuning: increasing epoches 

In [111]:
# set hyperparameters 
input_size = 32*32*3
hidden_size = 256
output_size = 10
learning_rate = 0.08
epochs = 25

In [112]:
# sigmoid without regularization
NN7=NNSig_without_reg(x_train, y_train)
NN7.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs)
NN7.train()
NN7.test(x_test,y_test)

Epoch 1/25, Loss: 603.1886408810551
Epoch 2/25, Loss: 495.1802934236118
Epoch 3/25, Loss: 425.1593838081214
Epoch 4/25, Loss: 371.8896749309305
Epoch 5/25, Loss: 331.0956603025419
Epoch 6/25, Loss: 298.0086278467523
Epoch 7/25, Loss: 270.51747098536913
Epoch 8/25, Loss: 247.73035227359583
Epoch 9/25, Loss: 228.95866491917994
Epoch 10/25, Loss: 213.6184447116627
Epoch 11/25, Loss: 201.1645956105323
Epoch 12/25, Loss: 191.0938238201308
Epoch 13/25, Loss: 182.98860279833542
Epoch 14/25, Loss: 176.50207802868732
Epoch 15/25, Loss: 171.3359962496503
Epoch 16/25, Loss: 167.23950474800876
Epoch 17/25, Loss: 163.99823623383907
Epoch 18/25, Loss: 161.43067208115906
Epoch 19/25, Loss: 159.38813924942352
Epoch 20/25, Loss: 157.75392084822113
Epoch 21/25, Loss: 156.43675706397968
Epoch 22/25, Loss: 155.36314199002052
Epoch 23/25, Loss: 154.47346787880778
Epoch 24/25, Loss: 153.72426019924183
Epoch 25/25, Loss: 153.09281759838126
Accuracy on test set: 0.361


In [113]:
#sigmoid with regularization with increased epoches
NN8 = NNSigmoid_with_reg(x_train, y_train)
NN8.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN8.train()
NN8.test(x_test, y_test)

Epoch 1/25, Loss: 921.0140010538407
Epoch 2/25, Loss: 810.1651472620395
Epoch 3/25, Loss: 715.5557130643906
Epoch 4/25, Loss: 636.8002219839682
Epoch 5/25, Loss: 573.6088838885397
Epoch 6/25, Loss: 521.7588307543392
Epoch 7/25, Loss: 479.08294058688637
Epoch 8/25, Loss: 443.83106323856657
Epoch 9/25, Loss: 414.5679104603585
Epoch 10/25, Loss: 390.0577196829006
Epoch 11/25, Loss: 369.2623589449024
Epoch 12/25, Loss: 351.3506931276166
Epoch 13/25, Loss: 335.70586079557347
Epoch 14/25, Loss: 321.89573542814003
Epoch 15/25, Loss: 309.5778783624857
Epoch 16/25, Loss: 298.4890564576995
Epoch 17/25, Loss: 288.4454557837005
Epoch 18/25, Loss: 279.3019083174471
Epoch 19/25, Loss: 270.91364536385316
Epoch 20/25, Loss: 263.1341593172675
Epoch 21/25, Loss: 255.86675303905992
Epoch 22/25, Loss: 249.07492195250074
Epoch 23/25, Loss: 242.7380984091927
Epoch 24/25, Loss: 236.82671273573544
Epoch 25/25, Loss: 231.30221703564644
Accuracy on test set: 0.3772


In [114]:
#tanh without regularization with increased epoches
NN9 = NNtanh_without_reg(x_train, y_train)
NN9.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs)
NN9.train()
NN9.test(x_test, y_test)

Epoch 1/25, Loss: 801.0696839982515
Epoch 2/25, Loss: 555.232352050709
Epoch 3/25, Loss: 432.6619317157538
Epoch 4/25, Loss: 349.52687948002153
Epoch 5/25, Loss: 256.6196766140171
Epoch 6/25, Loss: 211.16707021186755
Epoch 7/25, Loss: 175.9950748001419
Epoch 8/25, Loss: 156.00196067436192
Epoch 9/25, Loss: 149.3373900765412
Epoch 10/25, Loss: 148.08143474887157
Epoch 11/25, Loss: 146.71014141787393
Epoch 12/25, Loss: 145.83349994033608
Epoch 13/25, Loss: 145.26393389937007
Epoch 14/25, Loss: 144.89259875208714
Epoch 15/25, Loss: 144.7631465642074
Epoch 16/25, Loss: 144.4762227718262
Epoch 17/25, Loss: 144.46123994480615
Epoch 18/25, Loss: 144.86009009803382
Epoch 19/25, Loss: 145.19157265249027
Epoch 20/25, Loss: 145.12619896287936
Epoch 21/25, Loss: 145.05335898190572
Epoch 22/25, Loss: 145.03037027525758
Epoch 23/25, Loss: 145.2535019713668
Epoch 24/25, Loss: 145.6685765815492
Epoch 25/25, Loss: 146.0338958829936
Accuracy on test set: 0.3116


In [115]:
#tanh with regularization with increased epoches
NN10 = NNTanh_with_reg(x_train, y_train)
NN10.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN10.train()
NN10.test(x_test, y_test)

Epoch 1/25, Loss: 1365.1156762148453
Epoch 2/25, Loss: 1066.9651156171242
Epoch 3/25, Loss: 870.1194680560227
Epoch 4/25, Loss: 699.4822778488323
Epoch 5/25, Loss: 572.4516264644351
Epoch 6/25, Loss: 488.115500132557
Epoch 7/25, Loss: 435.86962963994847
Epoch 8/25, Loss: 405.3119152359682
Epoch 9/25, Loss: 385.85007534040983
Epoch 10/25, Loss: 370.75232325579174
Epoch 11/25, Loss: 356.86964542728197
Epoch 12/25, Loss: 344.749678829413
Epoch 13/25, Loss: 332.7297767347682
Epoch 14/25, Loss: 320.5455500335621
Epoch 15/25, Loss: 308.9985520649053
Epoch 16/25, Loss: 298.71252105654395
Epoch 17/25, Loss: 288.9732834432669
Epoch 18/25, Loss: 279.56377853515085
Epoch 19/25, Loss: 270.6644994301899
Epoch 20/25, Loss: 262.0724638052869
Epoch 21/25, Loss: 253.65902371561316
Epoch 22/25, Loss: 246.0251242253342
Epoch 23/25, Loss: 239.70472358483474
Epoch 24/25, Loss: 234.24182212042732
Epoch 25/25, Loss: 229.1857173206693
Accuracy on test set: 0.3916


In [118]:
epochs= 50
learning_rate = 0.04
l2_reg = 0.001
#relu with l2 with decreased learning rate and increased epoches
NN11 = NNReLU_with_reg(x_train, y_train)
NN11.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN11.train()
NN11.test(x_test, y_test)

  data_loss = -np.sum(np.log(correct_scores / np.sum(scores, axis=1)))


Epoch 1/50, Loss: 4420.789935737893
Epoch 2/50, Loss: 2714.0658935789197
Epoch 3/50, Loss: 2633.973520207864
Epoch 4/50, Loss: 2122.8809281527665
Epoch 5/50, Loss: 1820.3331728175542
Epoch 6/50, Loss: 1318.3813425458927
Epoch 7/50, Loss: 1277.2255099913802
Epoch 8/50, Loss: 1254.8909645518568
Epoch 9/50, Loss: 1214.0504648931992
Epoch 10/50, Loss: 907.6388002164042
Epoch 11/50, Loss: 889.3323750385523
Epoch 12/50, Loss: 915.9865502861505
Epoch 13/50, Loss: 914.6396532826826
Epoch 14/50, Loss: 716.5623391687959
Epoch 15/50, Loss: 837.1048975214585
Epoch 16/50, Loss: 741.9251249424806
Epoch 17/50, Loss: 658.3566483006866
Epoch 18/50, Loss: 566.4429606520762
Epoch 19/50, Loss: 595.3039442459561
Epoch 20/50, Loss: 558.299351612564
Epoch 21/50, Loss: 560.4279405160172
Epoch 22/50, Loss: 481.3554024645289
Epoch 23/50, Loss: 442.50925856141714
Epoch 24/50, Loss: 469.54836964202616
Epoch 25/50, Loss: 432.5463418933922
Epoch 26/50, Loss: 412.06547406641334
Epoch 27/50, Loss: 386.21507796858873


In [120]:
#sigmoid with regularization with increased epoches
l2_reg = 0.008
NN12 = NNSigmoid_with_reg(x_train, y_train)
NN12.setHyperParameters(input_size, hidden_size, output_size, learning_rate, epochs, l2_reg)
NN12.train()
NN12.test(x_test, y_test)

Epoch 1/50, Loss: 3034.8136139561575
Epoch 2/50, Loss: 2344.5002763041675
Epoch 3/50, Loss: 1827.6575599459845
Epoch 4/50, Loss: 1435.013159162898
Epoch 5/50, Loss: 1135.5056065621177
Epoch 6/50, Loss: 906.3846090935845
Epoch 7/50, Loss: 731.1867943643086
Epoch 8/50, Loss: 597.2308019875381
Epoch 9/50, Loss: 494.7710038635922
Epoch 10/50, Loss: 416.2930796398062
Epoch 11/50, Loss: 356.0824798535658
Epoch 12/50, Loss: 309.7817301854162
Epoch 13/50, Loss: 274.08686184656193
Epoch 14/50, Loss: 246.50202743563148
Epoch 15/50, Loss: 225.1290215650143
Epoch 16/50, Loss: 208.54727025940693
Epoch 17/50, Loss: 195.69432782786527
Epoch 18/50, Loss: 185.71155346478255
Epoch 19/50, Loss: 177.9052976925183
Epoch 20/50, Loss: 171.7662768430538
Epoch 21/50, Loss: 166.93228264461095
Epoch 22/50, Loss: 163.131543742159
Epoch 23/50, Loss: 160.14424019735014
Epoch 24/50, Loss: 157.79108232880068
Epoch 25/50, Loss: 155.93037056704895
Epoch 26/50, Loss: 154.45238205351092
Epoch 27/50, Loss: 153.27244449152