<a href="https://colab.research.google.com/github/7201krap/PYTORCH_project/blob/main/sparsity_4_optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np

In [2]:
mnist_trainset = datasets.MNIST(root='./data', train=True, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

mnist_testset  = datasets.MNIST(root='./data', 
                                train=False, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

train_dataloader = torch.utils.data.DataLoader(mnist_trainset, 
                                               batch_size=50, 
                                               shuffle=True)

test_dataloader  = torch.utils.data.DataLoader(mnist_testset, 
                                               batch_size=50, 
                                               shuffle=False)

print("Training dataset size: ", len(mnist_trainset))
print("Testing dataset size: ",  len(mnist_testset))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!
Training dataset size:  60000
Testing dataset size:  10000


In [3]:
# Define the model 
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear_1 = torch.nn.Linear(784, 256)
        self.linear_2 = torch.nn.Linear(256, 10)
        self.sigmoid  = torch.nn.Sigmoid()

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        x = self.linear_1(x)
        x = self.sigmoid(x)
        pred = self.linear_2(x)

        return pred

In [4]:
def get_activation(model):    
    def hook(module, input, output):
        model.layer_activations = output
    return hook





In [5]:
def sparsity_trainer(optimizer, model):

    # reset the model 

    criterion = torch.nn.CrossEntropyLoss()
    no_epochs = 20
    train_loss = list()
    test_loss  = list()
    test_acc   = list()
    final_spareness = list()

    # define activation list 

    best_test_loss = 1

    for epoch in range(no_epochs):
        total_train_loss = 0
        total_test_loss = 0

        hidden_layer_activation_list = list()

        # training
        # set up training mode 
        model.train()

        for itr, (image, label) in enumerate(train_dataloader):

            optimizer.zero_grad()

            pred = model(image)

            loss = criterion(pred, label)
            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()

            # Print/Append activation of the hidden layer 
            # print(model.layer_activations.shape)
            hidden_layer_activation_list.append(model.layer_activations)
        
        # this conains activations for all epochs 
        final_spareness.append(hidden_layer_activation_list)

        total_train_loss = total_train_loss / (itr + 1)
        train_loss.append(total_train_loss)

        # testing 
        # change to evaluation mode 
        model.eval()
        total = 0
        for itr, (image, label) in enumerate(test_dataloader):

            pred = model(image)

            loss = criterion(pred, label)
            total_test_loss += loss.item()

            # we now need softmax because we are testing.
            pred = torch.nn.functional.softmax(pred, dim=1)
            for i, p in enumerate(pred):
                if label[i] == torch.max(p.data, 0)[1]:
                    total = total + 1

        # caculate accuracy 
        accuracy = total / len(mnist_testset)

        # append accuracy here
        test_acc.append(accuracy)

        # append test loss here 
        total_test_loss = total_test_loss / (itr + 1)
        test_loss.append(total_test_loss)

        print('\nEpoch: {}/{}, Train Loss: {:.8f}, Test Loss: {:.8f}, Test Accuracy: {:.8f}'.format(epoch + 1, no_epochs, total_train_loss, total_test_loss, accuracy))

        if total_test_loss < best_test_loss:
            best_test_loss = total_test_loss
            print("Saving the model state dictionary for Epoch: {} with Test loss: {:.8f}".format(epoch + 1, total_test_loss))
            torch.save(model.state_dict(), "model.dth")

    sparseness_list = list()

    for single_epoch_spareness in final_spareness:

        hidden_layer_activation_list = single_epoch_spareness
        hidden_layer_activation_list = torch.stack(hidden_layer_activation_list)
        layer_activations_list = torch.reshape(hidden_layer_activation_list, (60000, 256))

        layer_activations_list = torch.abs(layer_activations_list)  # modified 
        num_neurons = layer_activations_list.shape[1]
        population_sparseness = (np.sqrt(num_neurons) - (torch.sum(layer_activations_list, dim=1) / torch.sqrt(torch.sum(layer_activations_list ** 2, dim=1)))) / (np.sqrt(num_neurons) - 1)
        mean_sparseness_per_epoch = torch.mean(population_sparseness)

        sparseness_list.append(mean_sparseness_per_epoch)

    return test_acc, sparseness_list

In [6]:
# adagrad 
model_adagrad = Model()
model_adagrad.linear_1.register_forward_hook(get_activation(model_adagrad))
optimizer_adagrad  = torch.optim.Adagrad(model_adagrad.parameters(), lr=0.1)

# adadelta
model_adadelta = Model()
model_adadelta.linear_1.register_forward_hook(get_activation(model_adadelta))
optimizer_adadelta = torch.optim.Adadelta(model_adadelta.parameters(), lr=1)

# adam
model_adam = Model()
model_adam.linear_1.register_forward_hook(get_activation(model_adam))
optimizer_adam = torch.optim.Adam(model_adam.parameters(), lr=0.001)


# SGD 
model_sgd = Model()
model_sgd.linear_1.register_forward_hook(get_activation(model_sgd))
optimizer_sgd      = torch.optim.SGD(model_sgd.parameters(), lr=0.1)

In [None]:
print("\n===== Entering Adagrad optimizer =====\n")
adagrad_test_acc, adagrad_sparseness_list   = sparsity_trainer(optimizer=optimizer_adagrad,
                                                               model=model_adagrad)

print("\n===== Entering Adadelta optimizer =====\n")
adadelta_test_acc, adadelta_sparseness_list = sparsity_trainer(optimizer=optimizer_adadelta,
                                                               model=model_adadelta)

print("\n===== Entering Adam optimizer =====\n")
adam_test_acc, adam_sparseness_list = sparsity_trainer(optimizer=optimizer_adam,
                                                               model=model_adam)

print("\n===== Entering SGD optimizer =====\n")
sgd_test_acc, sgd_sparseness_list           = sparsity_trainer(optimizer=optimizer_sgd,
                                                               model=model_sgd)


===== Entering Adagrad optimizer =====


Epoch: 1/20, Train Loss: 0.23493096, Test Loss: 0.12946224, Test Accuracy: 0.96010000
Saving the model state dictionary for Epoch: 1 with Test loss: 0.12946224

Epoch: 2/20, Train Loss: 0.09389984, Test Loss: 0.09368966, Test Accuracy: 0.96860000
Saving the model state dictionary for Epoch: 2 with Test loss: 0.09368966

Epoch: 3/20, Train Loss: 0.06561306, Test Loss: 0.08290215, Test Accuracy: 0.97330000
Saving the model state dictionary for Epoch: 3 with Test loss: 0.08290215

Epoch: 4/20, Train Loss: 0.04958360, Test Loss: 0.07514061, Test Accuracy: 0.97720000
Saving the model state dictionary for Epoch: 4 with Test loss: 0.07514061

Epoch: 5/20, Train Loss: 0.03878874, Test Loss: 0.07101393, Test Accuracy: 0.97840000
Saving the model state dictionary for Epoch: 5 with Test loss: 0.07101393

Epoch: 6/20, Train Loss: 0.03124737, Test Loss: 0.06804805, Test Accuracy: 0.97860000
Saving the model state dictionary for Epoch: 6 with Test loss: 0.06

In [None]:
fig=plt.figure(figsize=(20, 10))
plt.plot(np.arange(1, 20+1), adagrad_sparseness_list, label="Adagrad Sparseness", color='g')
plt.plot(np.arange(1, 20+1), adadelta_sparseness_list, label="Adadelta Sparseness", color='b')
plt.plot(np.arange(1, 20+1), adam_sparseness_list, label="Adam Sparseness", color='y')
plt.plot(np.arange(1, 20+1), sgd_sparseness_list, label="SGD Sparseness", color='r')
plt.xlabel('Epochs')
plt.ylabel('Sparsity')
plt.ylim(0, 0.3)
plt.title("Sparsity Plot")
plt.legend(loc='best')
plt.show()

In [None]:
fig=plt.figure(figsize=(20, 10))
plt.plot(np.arange(1, 20+1), adagrad_test_acc, label="Adagrad Test Accuracy", color='g')
plt.plot(np.arange(1, 20+1), adadelta_test_acc, label="Adadelta Test Accuracy", color='b')
plt.plot(np.arange(1, 20+1), adam_test_acc, label="Adam Test Accuracy", color='y')
plt.plot(np.arange(1, 20+1), sgd_test_acc, label="SGD Test Accuracy", color='r')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.ylim(0.85, 1)
plt.title("Accuracy Plot")
plt.legend(loc='best')
plt.show()