<a href="https://colab.research.google.com/github/7201krap/PYTORCH_introduction_to_pytorch/blob/main/find_sparsity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
import numpy as np

In [None]:
mnist_trainset = datasets.MNIST(root='./data', train=True, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

mnist_testset  = datasets.MNIST(root='./data', 
                                train=False, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

train_dataloader = torch.utils.data.DataLoader(mnist_trainset, 
                                               batch_size=50, 
                                               shuffle=True)

test_dataloader  = torch.utils.data.DataLoader(mnist_testset, 
                                               batch_size=50, 
                                               shuffle=False)

print("Training dataset size: ", len(mnist_trainset))
print("Testing dataset size: ",  len(mnist_testset))

In [89]:
# Define the model 
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear_1 = torch.nn.Linear(784, 256)
        self.linear_2 = torch.nn.Linear(256, 10)
        self.sigmoid  = torch.nn.Sigmoid()

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        x = self.linear_1(x)
        x = self.sigmoid(x)
        pred = self.linear_2(x)

        return pred

model = Model()

In [90]:
def get_activation():    
    def hook(module, input, output):
        model.layer_activations = output
    return hook
    
model.linear_1.register_forward_hook(get_activation())

<torch.utils.hooks.RemovableHandle at 0x7fe54b765898>

In [91]:
# adagrad 
optimizer  = torch.optim.Adagrad(model.parameters(), lr=0.1)

In [92]:
criterion = torch.nn.CrossEntropyLoss()
no_epochs = 20
train_loss = list()
test_loss  = list()
test_acc   = list()

# define activation list 

best_test_loss = 1

for epoch in range(no_epochs):
    total_train_loss = 0
    total_test_loss = 0

    hidden_layer_activation_list = list()

    # training
    # set up training mode 
    model.train()

    for itr, (image, label) in enumerate(train_dataloader):

        optimizer.zero_grad()

        pred = model(image)

        loss = criterion(pred, label)
        total_train_loss += loss.item()

        loss.backward()
        optimizer.step()

        # Print/Append activation of the hidden layer 
        # print(model.layer_activations.shape)
        hidden_layer_activation_list.append(model.layer_activations)

    total_train_loss = total_train_loss / (itr + 1)
    train_loss.append(total_train_loss)

    # testing 
    # change to evaluation mode 
    model.eval()
    total = 0
    for itr, (image, label) in enumerate(test_dataloader):

        pred = model(image)

        loss = criterion(pred, label)
        total_test_loss += loss.item()

        # we now need softmax because we are testing.
        pred = torch.nn.functional.softmax(pred, dim=1)
        for i, p in enumerate(pred):
            if label[i] == torch.max(p.data, 0)[1]:
                total = total + 1

    # caculate accuracy 
    accuracy = total / len(mnist_testset)

    # append accuracy here
    test_acc.append(accuracy)

    # append test loss here 
    total_test_loss = total_test_loss / (itr + 1)
    test_loss.append(total_test_loss)

    print('\nEpoch: {}/{}, Train Loss: {:.8f}, Test Loss: {:.8f}, Test Accuracy: {:.8f}'.format(epoch + 1, no_epochs, total_train_loss, total_test_loss, accuracy))

    if total_test_loss < best_test_loss:
        best_test_loss = total_test_loss
        print("Saving the model state dictionary for Epoch: {} with Test loss: {:.8f}".format(epoch + 1, total_test_loss))
        torch.save(model.state_dict(), "model.dth")


Epoch: 1/20, Train Loss: 0.22762350, Test Loss: 0.12376226, Test Accuracy: 0.96120000
Saving the model state dictionary for Epoch: 1 with Test loss: 0.12376226

Epoch: 2/20, Train Loss: 0.09335259, Test Loss: 0.09199230, Test Accuracy: 0.97110000
Saving the model state dictionary for Epoch: 2 with Test loss: 0.09199230

Epoch: 3/20, Train Loss: 0.06371993, Test Loss: 0.08074189, Test Accuracy: 0.97520000
Saving the model state dictionary for Epoch: 3 with Test loss: 0.08074189

Epoch: 4/20, Train Loss: 0.04760822, Test Loss: 0.07486176, Test Accuracy: 0.97710000
Saving the model state dictionary for Epoch: 4 with Test loss: 0.07486176

Epoch: 5/20, Train Loss: 0.03692684, Test Loss: 0.07155853, Test Accuracy: 0.97870000
Saving the model state dictionary for Epoch: 5 with Test loss: 0.07155853

Epoch: 6/20, Train Loss: 0.02919866, Test Loss: 0.06864063, Test Accuracy: 0.97860000
Saving the model state dictionary for Epoch: 6 with Test loss: 0.06864063

Epoch: 7/20, Train Loss: 0.023658

In [93]:
hidden_layer_activation_list = torch.stack(hidden_layer_activation_list)

In [94]:
hidden_layer_activation_list.size()

torch.Size([1200, 50, 256])

In [95]:
layer_activations_list = torch.reshape(hidden_layer_activation_list, (60000, 256))

In [96]:
layer_activations_list.size()

torch.Size([60000, 256])

In [99]:
layer_activations_list = torch.abs(layer_activations_list)  # modified 
num_neurons = layer_activations_list.shape[1]
print(num_neurons)
population_sparseness = (np.sqrt(num_neurons) - (torch.sum(layer_activations_list, dim=1) / torch.sqrt(torch.sum(layer_activations_list ** 2, dim=1)))) / (np.sqrt(num_neurons) - 1)
print(population_sparseness.size())
mean_sparseness_per_epoch = torch.mean(population_sparseness)

256
torch.Size([60000])


In [98]:
mean_sparseness_per_epoch

tensor(0.1598, grad_fn=<MeanBackward0>)