<a href="https://colab.research.google.com/github/AkkiNikumbh/DL-EXPERIMENTS/blob/main/AkashSingh_23CS036_Experiment2_dl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import numpy as np

### 1. LOADING DATASET

In [2]:
train_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=True,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

val_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=torchvision.transforms.ToTensor(),
    download=True
)

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=64, shuffle=True
)

val_loader = torch.utils.data.DataLoader(
    dataset=val_dataset, batch_size=64, shuffle=False
)

100%|██████████| 9.91M/9.91M [00:00<00:00, 12.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 339kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.20MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 2.27MB/s]


### 2.NUMPY PREPROCESSING UTILITIES

In [3]:
# ONE HOT ENCODING
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((labels.shape[0], num_classes))
    for i in range(labels.shape[0]):
        one_hot[i, labels[i]] = 1
    return one_hot

In [4]:
# ACCURACY
def compute_accuracy(y_true, y_pred):
    correct = np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1)
    return np.mean(correct)

###3.ACTIVATION FUNCTIONS AND THEIR DERIVATIVES

In [5]:
#1.RELU
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return (z > 0).astype(float)

#2.SIGMOID
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s * (1 - s)

#3.TANH
def tanh(z):
    return np.tanh(z)

def tanh_derivative(z):
    return 1 - np.tanh(z) ** 2

#4.SOFTMAX
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

### 4. NEURAL NETWORK CLASS

In [6]:
class NeuralNetwork:
    def __init__(self, layer_sizes, activations, learning_rate):
        self.layer_sizes = layer_sizes
        self.activations = activations
        self.learning_rate = learning_rate

        self.weights = {}
        self.biases = {}
        self.z_values = {}
        self.a_values = {}
        self.gradients = {}

        for i in range(len(layer_sizes) - 1):
            self.weights[i] = np.random.randn(
                layer_sizes[i], layer_sizes[i + 1]
            ) * 0.01
            self.biases[i] = np.zeros((1, layer_sizes[i + 1]))
    def forward(self, X):
      self.a_values[0] = X

      for i in range(len(self.activations)):
          Z = np.dot(self.a_values[i], self.weights[i]) + self.biases[i]
          self.z_values[i + 1] = Z

          if self.activations[i] == "relu":
              A = relu(Z)
          elif self.activations[i] == "sigmoid":
              A = sigmoid(Z)
          elif self.activations[i] == "tanh":
              A = tanh(Z)
          elif self.activations[i] == "softmax":
              A = softmax(Z)

          self.a_values[i + 1] = A

      return A

    def compute_loss(self, y_true, y_pred):
      epsilon = 1e-8
      loss = -np.sum(y_true * np.log(y_pred + epsilon)) / y_true.shape[0]
      return loss
    def backward(self, y_true):
      m = y_true.shape[0]
      L = len(self.activations)

      dZ = self.a_values[L] - y_true

      for i in reversed(range(L)):
          dW = np.dot(self.a_values[i].T, dZ) / m
          dB = np.sum(dZ, axis=0, keepdims=True) / m

          self.gradients["W" + str(i)] = dW
          self.gradients["B" + str(i)] = dB

          if i > 0:
              if self.activations[i - 1] == "relu":
                  dZ = np.dot(dZ, self.weights[i].T) * relu_derivative(self.z_values[i])
              elif self.activations[i - 1] == "sigmoid":
                  dZ = np.dot(dZ, self.weights[i].T) * sigmoid_derivative(self.z_values[i])
              elif self.activations[i - 1] == "tanh":
                  dZ = np.dot(dZ, self.weights[i].T) * tanh_derivative(self.z_values[i])
    def update_parameters(self):
      for i in range(len(self.weights)):
          self.weights[i] -= self.learning_rate * self.gradients["W" + str(i)]
          self.biases[i] -= self.learning_rate * self.gradients["B" + str(i)]

    def predict(self, X):
      return self.forward(X)
    def evaluate(self, X, y_true):
      y_pred = self.forward(X)
      return compute_accuracy(y_true, y_pred)

### 5.TRAINING LOOP

In [7]:
def train_model(model, train_loader, val_loader, epochs):
    history = []

    for epoch in range(epochs):
        train_losses = []
        train_accuracies = []

        for images, labels in train_loader:
            images = images.cpu()
            labels = labels.cpu()

            images_np = images.numpy()
            labels_np = labels.numpy()

            images_np = images_np.reshape(images_np.shape[0], -1)
            images_np = images_np / 1.0  # already normalized by ToTensor()

            labels_oh = one_hot_encode(labels_np)

            outputs = model.forward(images_np)
            loss = model.compute_loss(labels_oh, outputs)

            model.backward(labels_oh)
            model.update_parameters()

            train_losses.append(loss)
            train_accuracies.append(compute_accuracy(labels_oh, outputs))

        val_losses = []
        val_accuracies = []

        for images, labels in val_loader:
            images = images.cpu()
            labels = labels.cpu()

            images_np = images.numpy()
            labels_np = labels.numpy()

            images_np = images_np.reshape(images_np.shape[0], -1)
            images_np = images_np / 1.0

            labels_oh = one_hot_encode(labels_np)

            outputs = model.forward(images_np)
            val_losses.append(model.compute_loss(labels_oh, outputs))
            val_accuracies.append(compute_accuracy(labels_oh, outputs))

        history.append({
            "epoch": epoch + 1,
            "train_loss": np.mean(train_losses),
            "train_accuracy": np.mean(train_accuracies),
            "val_loss": np.mean(val_losses),
            "val_accuracy": np.mean(val_accuracies)
        })

        print(
            "Epoch:", epoch + 1,
            "Train Acc:", history[-1]["train_accuracy"],
            "Val Acc:", history[-1]["val_accuracy"]
        )

    return history

### 6.EXPERIMENTS

In [8]:
experiments = [
    {
        "layers": [784, 128, 10],
        "activations": ["relu", "softmax"]
    },
    {
        "layers": [784, 256, 128, 10],
        "activations": ["relu", "relu", "softmax"]
    },
    {
        "layers": [784, 128, 64, 10],
        "activations": ["tanh", "tanh", "softmax"]
    }
]

In [9]:
for exp in experiments:
    model = NeuralNetwork(
        layer_sizes=exp["layers"],
        activations=exp["activations"],
        learning_rate=0.01
    )

    history = train_model(
        model,
        train_loader,
        val_loader,
        epochs=10
    )

Epoch: 1 Train Acc: 0.6240171908315565 Val Acc: 0.7915007961783439
Epoch: 2 Train Acc: 0.836004131130064 Val Acc: 0.8714171974522293
Epoch: 3 Train Acc: 0.8773320895522388 Val Acc: 0.8921178343949044
Epoch: 4 Train Acc: 0.8933069029850746 Val Acc: 0.9015724522292994
Epoch: 5 Train Acc: 0.9005863539445629 Val Acc: 0.9074442675159236
Epoch: 6 Train Acc: 0.9055670309168443 Val Acc: 0.9126194267515924
Epoch: 7 Train Acc: 0.9105143923240938 Val Acc: 0.9168988853503185
Epoch: 8 Train Acc: 0.9138792643923241 Val Acc: 0.9200835987261147
Epoch: 9 Train Acc: 0.9177605277185501 Val Acc: 0.9231687898089171
Epoch: 10 Train Acc: 0.920092617270789 Val Acc: 0.9244625796178344
Epoch: 1 Train Acc: 0.11222348081023455 Val Acc: 0.11355493630573249
Epoch: 2 Train Acc: 0.1382762526652452 Val Acc: 0.2695063694267516
Epoch: 3 Train Acc: 0.39835421108742003 Val Acc: 0.6192277070063694
Epoch: 4 Train Acc: 0.722581289978678 Val Acc: 0.7886146496815286
Epoch: 5 Train Acc: 0.8109841417910447 Val Acc: 0.83917197452