In [2]:
import torch
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np

In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = datasets.MNIST('', download=True, train=True, transform=transform)
testset = datasets.MNIST('', download=True, train=False, transform=transform)

In [4]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=10, shuffle=True)

Here we are performing mini-batch gradient with batch size of 10.

Below is the code for tanh activation function with epoch size of 25, 50, 100, 150 and fluctuating hidden layer size with number of neurons as 25, 50, 100, 150.

In [None]:
learning_rate = 0.001
epochs_list = [25, 50, 100, 150]
hidden_sizes = [25, 50, 100, 150]
accuracies_epoch = []
accuracies_hidden = []

# Define the model architecture
input_size = 784
output_size = len(trainloader.dataset.classes)

for hidden_size in hidden_sizes:
    accuracies_epoch_for_hidden = []
    for epochs in epochs_list:
        # Initialize weights with random values
        w1 = torch.randn(input_size, hidden_size, requires_grad=True)
        w2 = torch.randn(hidden_size, output_size, requires_grad=True)

        for epoch in range(epochs):
            # Training loop
            total_loss = 0.0
            for images, labels in trainloader:
                images = images.view(images.shape[0], -1)

                # Forward pass
                h = images.mm(w1)
                h_tan = h.tanh()
                y_pred = h_tan.mm(w2)
                y_pred_sf = y_pred.softmax(dim=1)

                # Compute the loss
                loss = -torch.log(y_pred_sf[range(images.shape[0]), labels]).mean()

                # Backpropagation
                grad_y_pred = y_pred_sf.clone()
                grad_y_pred[range(images.shape[0]), labels] -= 1
                grad_w2 = h_tan.t().mm(grad_y_pred)
                grad_h_tan = grad_y_pred.mm(w2.t())
                grad_h = grad_h_tan * (1 - h_tan**2)
                grad_w1 = images.t().mm(grad_h)

                # Update weights manually
                with torch.no_grad():
                    w1 -= learning_rate * grad_w1
                    w2 -= learning_rate * grad_w2

                total_loss += loss.item()

            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {total_loss / len(trainloader)}')

        # Evaluation
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in testloader:
                x = x.view(x.shape[0], -1)
                h_tan = x.mm(w1).tanh()
                y_pred = h_tan.mm(w2)
                predictions = torch.argmax(y_pred, dim=1)
                total += y.size(0)
                correct += (predictions == y).sum().item()

        accuracy = (correct / total) * 100
        accuracies_epoch_for_hidden.append(accuracy)

        print(f"Epoch {epoch+1} Accuracy = {accuracy:.2f}%, Loss = {loss:.4f}")

    accuracies_epoch.append(accuracies_epoch_for_hidden)

# Plot the accuracy against the number of epochs
plt.figure(figsize=(12, 6))
for i, hidden_size in enumerate(hidden_sizes):
    plt.plot(epochs_list, accuracies_epoch[i], marker='o', label=f'Hidden Layers = {hidden_size}')

plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Epochs for Different Hidden Layer Sizes')
plt.legend()
plt.grid(True)
plt.show()

# Plot the accuracy against the number of hidden layers
plt.figure(figsize=(12, 6))
for i, epoch in enumerate(epochs_list):
    plt.plot(hidden_sizes, [accuracies_epoch[j][i] for j in range(len(hidden_sizes))], marker='o', label=f'Epochs = {epoch}')

plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Hidden Layers for Different Epochs')
plt.legend()
plt.grid(True)
plt.show()


Epoch [1/25], Loss: 1.8672158902287483
Epoch [2/25], Loss: 1.1071572224919994
Epoch [3/25], Loss: 0.9356303595801194
Epoch [4/25], Loss: 0.8322071065107981
Epoch [5/25], Loss: 0.7645340378321708
Epoch [6/25], Loss: 0.7105897632464766
Epoch [7/25], Loss: 0.6732550571883719
Epoch [8/25], Loss: 0.6395777036771179
Epoch [9/25], Loss: 0.6145410864837467
Epoch [10/25], Loss: 0.5915775176510215
Epoch [11/25], Loss: 0.5702574436962604
Epoch [12/25], Loss: 0.5541890967525542
Epoch [13/25], Loss: 0.5400053734096388
Epoch [14/25], Loss: 0.5265809397827834
Epoch [15/25], Loss: 0.514138159904629
Epoch [16/25], Loss: 0.503865550834996
Epoch [17/25], Loss: 0.49343966679212947
Epoch [18/25], Loss: 0.48345363628243404
Epoch [19/25], Loss: 0.47503812522844724
Epoch [20/25], Loss: 0.4663930728007108
Epoch [21/25], Loss: 0.4587594906529412
Epoch [22/25], Loss: 0.4538186256568879
Epoch [23/25], Loss: 0.4460026771426201
Epoch [24/25], Loss: 0.4411475485575696
Epoch [25/25], Loss: 0.43620817029336467
Epoch 2

Below is the code for relu activation function with mini-batch gradient - batch size is 10. Epochs are 25, 50, 100, 150 and Hidden layer neurons count as  25, 50, 100, 150.

In [None]:
# Define hyperparameters
learning_rate = 0.001
epochs_list = [25, 50, 100, 150]
hidden_sizes = [25, 50, 100, 150]
accuracies_epoch = []
accuracies_hidden = []

# Define the model architecture
input_size = 784
output_size = len(trainloader.dataset.classes)

for hidden_size in hidden_sizes:
    accuracies_epoch_for_hidden = []
    for epochs in epochs_list:
        # Initialize weights with random values
        w1 = torch.randn(input_size, hidden_size, requires_grad=True)
        w2 = torch.randn(hidden_size, output_size, requires_grad=True)

        for epoch in range(epochs):
            # Training loop
            total_loss = 0.0
            for images, labels in trainloader:
                images = images.view(images.shape[0], -1)

                # Forward pass
                h = images.mm(w1)
                h_relu = h.clamp(min=0)
                y_pred = h_relu.mm(w2)
                y_pred_sf = y_pred.softmax(dim=1)

                # Compute the loss
                loss = -torch.log(y_pred_sf[range(images.shape[0]), labels]).mean()

                # Backpropagation
                grad_y_pred = y_pred_sf.clone()
                grad_y_pred[range(images.shape[0]), labels] -= 1  # Derivative of cross-entropy loss w.r.t. y_pred
                grad_w2 = h_relu.t().mm(grad_y_pred)
                grad_h_relu = grad_y_pred.mm(w2.t())
                grad_h = grad_h_relu * (h > 0).float()  # Derivative of ReLU activation
                grad_w1 = images.t().mm(grad_h)

                # Update weights manually
                with torch.no_grad():
                    w1 -= learning_rate * grad_w1
                    w2 -= learning_rate * grad_w2

                total_loss += loss.item()

            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {total_loss / len(trainloader)}')

        # Evaluation
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in testloader:
                x = x.view(x.shape[0], -1)
                # Forward pass with ReLU activation
                h = x.mm(w1)
                h_relu = h.clamp(min=0)
                y_pred = h_relu.mm(w2)
                predictions = torch.argmax(y_pred, dim=1)
                total += y.size(0)
                correct += (predictions == y).sum().item()

        accuracy = (correct / total) * 100
        accuracies_epoch_for_hidden.append(accuracy)

        print(f"Epoch {epoch+1} Accuracy = {accuracy:.2f}%, Loss = {loss:.4f}")

    accuracies_epoch.append(accuracies_epoch_for_hidden)

# Plot the accuracy against the number of epochs
plt.figure(figsize=(12, 6))
for i, hidden_size in enumerate(hidden_sizes):
    plt.plot(epochs_list, accuracies_epoch[i], marker='o', label=f'Hidden Layers = {hidden_size}')

plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Epochs for Different Hidden Layer Sizes')
plt.legend()
plt.grid(True)
plt.show()

# Plot the accuracy against the number of hidden layers
plt.figure(figsize=(12, 6))
for i, epoch in enumerate(epochs_list):
    plt.plot(hidden_sizes, [accuracies_epoch[j][i] for j in range(len(hidden_sizes))], marker='o', label=f'Epochs = {epoch}')

plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Hidden Layers for Different Epochs')
plt.legend()
plt.grid(True)
plt.show()


Epoch [1/25], Loss: inf
Epoch [2/25], Loss: 1.4098715389172236
Epoch [3/25], Loss: 1.1480209658183158
Epoch [4/25], Loss: 0.9966049946248531
Epoch [5/25], Loss: 0.8966858030892909
Epoch [6/25], Loss: 0.8183965977939467
Epoch [7/25], Loss: 0.7548166111735627
Epoch [8/25], Loss: 0.7039645435269922
Epoch [9/25], Loss: 0.6680460308293501
Epoch [10/25], Loss: 0.6404238665907954
Epoch [11/25], Loss: 0.6162175307154345
Epoch [12/25], Loss: 0.5998404141202724
Epoch [13/25], Loss: 0.5849924296593139
Epoch [14/25], Loss: 0.5694586518509314
Epoch [15/25], Loss: 0.5575656891916878
Epoch [16/25], Loss: 0.5472234300557369
Epoch [17/25], Loss: 0.5340747766346515
Epoch [18/25], Loss: 0.5267505083676273
Epoch [19/25], Loss: 0.5190352385415075
Epoch [20/25], Loss: 0.5104343450952632
Epoch [21/25], Loss: 0.5036470326837152
Epoch [22/25], Loss: 0.4984143994964349
Epoch [23/25], Loss: 0.49160237882069
Epoch [24/25], Loss: 0.4881798222196521
Epoch [25/25], Loss: 0.4797189619791073
Epoch 25 Accuracy = 87.27%

Now after finishing mini-batch gradient we are moving to stochastic gradient.
Similar to mini-batch we are using Epoch size as 25, 50, 100, 150 and Hidden layer neurons as 25, 50, 100, 150

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True)

Further we will code for:

Activation function - Tanh

Epoch - 25, 50, 100, 150

Hidden layer - 25, 50, 100, 150

Stochastic Gradient descent

In [None]:
learning_rate = 0.001
epochs_list = [25, 50, 100, 150]
hidden_sizes = [25, 50, 100, 150]
accuracies_epoch = []
accuracies_hidden = []

# Define the model architecture
input_size = 784
output_size = len(trainloader.dataset.classes)

for hidden_size in hidden_sizes:
    accuracies_epoch_for_hidden = []
    for epochs in epochs_list:
        # Initialize weights with random values
        w1 = torch.randn(input_size, hidden_size, requires_grad=True)
        w2 = torch.randn(hidden_size, output_size, requires_grad=True)

        for epoch in range(epochs):
            # Training loop
            total_loss = 0.0
            for images, labels in trainloader:
                images = images.view(images.shape[0], -1)

                # Forward pass
                h = images.mm(w1)
                h_tan = h.tanh()
                y_pred = h_tan.mm(w2)
                y_pred_sf = y_pred.softmax(dim=1)

                # Compute the loss
                loss = -torch.log(y_pred_sf[range(images.shape[0]), labels]).mean()

                # Backpropagation
                grad_y_pred = y_pred_sf.clone()
                grad_y_pred[range(images.shape[0]), labels] -= 1
                grad_w2 = h_tan.t().mm(grad_y_pred)
                grad_h_tan = grad_y_pred.mm(w2.t())
                grad_h = grad_h_tan * (1 - h_tan**2)
                grad_w1 = images.t().mm(grad_h)

                # Update weights manually
                with torch.no_grad():
                    w1 -= learning_rate * grad_w1
                    w2 -= learning_rate * grad_w2

                total_loss += loss.item()

            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {total_loss / len(trainloader)}')

        # Evaluation
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in testloader:
                x = x.view(x.shape[0], -1)
                h_tan = x.mm(w1).tanh()
                y_pred = h_tan.mm(w2)
                predictions = torch.argmax(y_pred, dim=1)
                total += y.size(0)
                correct += (predictions == y).sum().item()

        accuracy = (correct / total) * 100
        accuracies_epoch_for_hidden.append(accuracy)

        print(f"Epoch {epoch+1} Accuracy = {accuracy:.2f}%, Loss = {loss:.4f}")

    accuracies_epoch.append(accuracies_epoch_for_hidden)

# Plot the accuracy against the number of epochs
plt.figure(figsize=(12, 6))
for i, hidden_size in enumerate(hidden_sizes):
    plt.plot(epochs_list, accuracies_epoch[i], marker='o', label=f'Hidden Layers = {hidden_size}')

plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Epochs for Different Hidden Layer Sizes')
plt.legend()
plt.grid(True)
plt.show()

# Plot the accuracy against the number of hidden layers
plt.figure(figsize=(12, 6))
for i, epoch in enumerate(epochs_list):
    plt.plot(hidden_sizes, [accuracies_epoch[j][i] for j in range(len(hidden_sizes))], marker='o', label=f'Epochs = {epoch}')

plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Hidden Layers for Different Epochs')
plt.legend()
plt.grid(True)
plt.show()


Activation function - Relu

Epoch - 25, 50, 100, 150

Hidden layer - 25, 50, 100, 150

Stochastic Gradient descent

In [None]:
# Define hyperparameters
learning_rate = 0.001
epochs_list = [25, 50, 100, 150]
hidden_sizes = [25, 50, 100, 150]
accuracies_epoch = []
accuracies_hidden = []

# Define the model architecture
input_size = 784
output_size = len(trainloader.dataset.classes)

for hidden_size in hidden_sizes:
    accuracies_epoch_for_hidden = []
    for epochs in epochs_list:
        # Initialize weights with random values
        w1 = torch.randn(input_size, hidden_size, requires_grad=True)
        w2 = torch.randn(hidden_size, output_size, requires_grad=True)

        for epoch in range(epochs):
            # Training loop
            total_loss = 0.0
            for images, labels in trainloader:
                images = images.view(images.shape[0], -1)

                # Forward pass
                h = images.mm(w1)
                h_relu = h.clamp(min=0)
                y_pred = h_relu.mm(w2)
                y_pred_sf = y_pred.softmax(dim=1)

                # Compute the loss
                loss = -torch.log(y_pred_sf[range(images.shape[0]), labels]).mean()

                # Backpropagation
                grad_y_pred = y_pred_sf.clone()
                grad_y_pred[range(images.shape[0]), labels] -= 1  # Derivative of cross-entropy loss w.r.t. y_pred
                grad_w2 = h_relu.t().mm(grad_y_pred)
                grad_h_relu = grad_y_pred.mm(w2.t())
                grad_h = grad_h_relu * (h > 0).float()  # Derivative of ReLU activation
                grad_w1 = images.t().mm(grad_h)

                # Update weights manually
                with torch.no_grad():
                    w1 -= learning_rate * grad_w1
                    w2 -= learning_rate * grad_w2

                total_loss += loss.item()

            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {total_loss / len(trainloader)}')

        # Evaluation
        correct = 0
        total = 0
        with torch.no_grad():
            for x, y in testloader:
                x = x.view(x.shape[0], -1)
                # Forward pass with ReLU activation
                h = x.mm(w1)
                h_relu = h.clamp(min=0)
                y_pred = h_relu.mm(w2)
                predictions = torch.argmax(y_pred, dim=1)
                total += y.size(0)
                correct += (predictions == y).sum().item()

        accuracy = (correct / total) * 100
        accuracies_epoch_for_hidden.append(accuracy)

        print(f"Epoch {epoch+1} Accuracy = {accuracy:.2f}%, Loss = {loss:.4f}")

    accuracies_epoch.append(accuracies_epoch_for_hidden)

# Plot the accuracy against the number of epochs
plt.figure(figsize=(12, 6))
for i, hidden_size in enumerate(hidden_sizes):
    plt.plot(epochs_list, accuracies_epoch[i], marker='o', label=f'Hidden Layers = {hidden_size}')

plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Epochs for Different Hidden Layer Sizes')
plt.legend()
plt.grid(True)
plt.show()

# Plot the accuracy against the number of hidden layers
plt.figure(figsize=(12, 6))
for i, epoch in enumerate(epochs_list):
    plt.plot(hidden_sizes, [accuracies_epoch[j][i] for j in range(len(hidden_sizes))], marker='o', label=f'Epochs = {epoch}')

plt.xlabel('Number of Hidden Layers')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy vs. Number of Hidden Layers for Different Epochs')
plt.legend()
plt.grid(True)
plt.show()
