<a href="https://colab.research.google.com/github/7201krap/PYTORCH_project/blob/main/sparsity_selectivity_sorted_300.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!wget www.di.ens.fr/~lelarge/MNIST.tar.gz
!tar -zxvf MNIST.tar.gz

root_dir = './'
torchvision.datasets.MNIST(root=root_dir,download=True)

--2021-03-18 10:53:34--  http://www.di.ens.fr/~lelarge/MNIST.tar.gz
Resolving www.di.ens.fr (www.di.ens.fr)... 129.199.99.14
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://www.di.ens.fr/~lelarge/MNIST.tar.gz [following]
--2021-03-18 10:53:34--  https://www.di.ens.fr/~lelarge/MNIST.tar.gz
Connecting to www.di.ens.fr (www.di.ens.fr)|129.199.99.14|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/x-gzip]
Saving to: ‘MNIST.tar.gz’

MNIST.tar.gz            [      <=>           ]  33.20M  33.0MB/s    in 1.0s    

2021-03-18 10:53:35 (33.0 MB/s) - ‘MNIST.tar.gz’ saved [34813078]

MNIST/
MNIST/raw/
MNIST/raw/train-labels-idx1-ubyte
MNIST/raw/t10k-labels-idx1-ubyte.gz
MNIST/raw/t10k-labels-idx1-ubyte
MNIST/raw/t10k-images-idx3-ubyte.gz
MNIST/raw/train-images-idx3-ubyte
MNIST/raw/train-labels-idx1-ubyte.gz
MNIST/raw/t10k-images-idx3-ubyte
MNIST/raw/tra

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./
    Split: Train

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
torch.manual_seed(1234)
np.random.seed(1234)

In [5]:
mnist_trainset = torchvision.datasets.MNIST(root=root_dir, train=True, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

mnist_testset  = torchvision.datasets.MNIST(root=root_dir, 
                                train=False, 
                                download=True, 
                                transform=transforms.Compose([transforms.ToTensor()]))

In [6]:
new_mnist_trainset =  [ [[],[]] for i in range(10)]
# new_mnist_testset  =  [ [[],[]] for i in range(10)]

for i in range(60000):
    for j in range(10):
        if mnist_trainset[i][1] == j:
            # image 
            new_mnist_trainset[j][0].append(mnist_trainset[i][0])  
            # label
            new_mnist_trainset[j][1].append(mnist_trainset[i][1])

# for i in range(10000):
#     for j in range(10):
#         if mnist_testset[i][1] == j:
#             # image 
#             new_mnist_testset[j][0].append(mnist_testset[i][0])  
#             # label
#             new_mnist_testset[j][1].append(mnist_testset[i][1])

image_trainset = list()
label_trainset = list()

# image_testset = list()
# label_testset = list()

for i in range(10):
    image_trainset.append(new_mnist_trainset[i][0])
    label_trainset.append(new_mnist_trainset[i][1])

# for i in range(10):
#     image_testset.append(new_mnist_testset[i][0])
#     label_testset.append(new_mnist_testset[i][1])

flattened_image_train = list()
flattened_label_train = list()

# flattened_image_test = list()
# flattened_label_test = list()

# flattening image 
for sublist in image_trainset:
    for val in sublist:
        flattened_image_train.append(val)

# flattening label
for sublist in label_trainset:
    for val in sublist:
        flattened_label_train.append(val)

# # flattening image 
# for sublist in image_testset:
#     for val in sublist:
#         flattened_image_test.append(val)

# # flattening label
# for sublist in label_testset:
#     for val in sublist:
#         flattened_label_test.append(val)

flattened_image_train = torch.stack(flattened_image_train)
flattened_label_train = torch.Tensor(flattened_label_train)
flattened_label_train = flattened_label_train.type(torch.LongTensor)

# flattened_image_test = torch.stack(flattened_image_test)
# flattened_label_test = torch.Tensor(flattened_label_test)
# flattened_label_test = flattened_label_test.type(torch.LongTensor)

train_dataset = TensorDataset(flattened_image_train, flattened_label_train)
train_dataloader = DataLoader(train_dataset, batch_size=50)

# test_dataset = TensorDataset(flattened_image_test, flattened_label_test)
test_dataloader  = torch.utils.data.DataLoader(mnist_testset, 
                                               batch_size=50, 
                                               shuffle=False)

In [7]:
# Define the model 
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear_1 = torch.nn.Linear(784, 256)
        self.linear_2 = torch.nn.Linear(256, 10)
        self.sigmoid  = torch.nn.Sigmoid()

    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        x = self.linear_1(x)
        x = self.sigmoid(x)
        pred = self.linear_2(x)

        return pred

In [8]:
def get_activation(model):    
    def hook(module, input, output):
        model.layer_activations = output
    return hook

In [9]:
def selectivity(hidden_layer_each_neuron):
    __selectivity__ = list()
    # I will now try to find the average of each class for each neuron.
    # check out the next cell 
    avg_activations = [dict() for x in range(256)]
    for i, neuron in enumerate(hidden_layer_each_neuron):
        for k, v in neuron.items():
            # v is the list of activations for hidden layer's neuron k 
            avg_activations[i][k] = sum(v) / float(len(v))

    # generate 256 lists to get only values in avg_activations
    only_activation_vals = [list() for x in range(256)]

    # get only values from avg_activations
    for i, avg_activation in enumerate(avg_activations):
        for value in avg_activation.values():
            only_activation_vals[i].append(value)

    for activation_val in only_activation_vals:
        # find u_max 
        u_max = np.max(activation_val)

        # find u_minus_max 
        u_minus_max = (np.sum(activation_val) - u_max) / 9

        # find selectivity 
        selectivity = (u_max - u_minus_max) / (u_max + u_minus_max)

        # append selectivity value to selectivity
        __selectivity__.append(selectivity)

    avg_selectivity = np.average(__selectivity__)
    std_selectivity = np.std(__selectivity__)
                                 
    return avg_selectivity, std_selectivity

In [10]:
def sparsity_calculator(final_spareness):
    sparseness_list = list()
    for single_epoch_spareness in final_spareness:

        hidden_layer_activation_list = single_epoch_spareness
        hidden_layer_activation_list = torch.stack(hidden_layer_activation_list)
        layer_activations_list = torch.reshape(hidden_layer_activation_list, (10000, 256))

        layer_activations_list = torch.abs(layer_activations_list)  # modified 
        num_neurons = layer_activations_list.shape[1]
        population_sparseness = (np.sqrt(num_neurons) - (torch.sum(layer_activations_list, dim=1) / torch.sqrt(torch.sum(layer_activations_list ** 2, dim=1)))) / (np.sqrt(num_neurons) - 1)
        mean_sparseness_per_epoch = torch.mean(population_sparseness)

        sparseness_list.append(mean_sparseness_per_epoch)

    return sparseness_list

In [11]:
no_epochs = 300
def selectivity_trainer(optimizer, model):

    criterion = torch.nn.CrossEntropyLoss()
    train_loss = list()
    test_loss  = list()
    test_acc   = list()

    final_spareness = list()
    
    final_selectivity_avg_list = list()
    final_selectivity_std_list = list()

    best_test_loss = 1

    for epoch in range(no_epochs):

        _hidden_layer_each_neuron_ = [{0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[], 7:[], 8:[], 9:[]} for x in range(256)]
        _hidden_layer_each_neuron_ = np.array(_hidden_layer_each_neuron_)

        hidden_layer_activation_list = list()

        total_train_loss = 0
        total_test_loss = 0

        # training
        # set up training mode 
        model.train()

        for itr, (images, labels) in enumerate(train_dataloader):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            pred = model(images)

            loss = criterion(pred, labels)
            total_train_loss += loss.item()

            loss.backward()
            optimizer.step()

            # Print/Append activation of the hidden layer 
            # print(model.layer_activations.shape)
            # model.layer_activations

        total_train_loss = total_train_loss / (itr + 1)
        train_loss.append(total_train_loss)

        # testing 
        # change to evaluation mode 
        model.eval()
        total = 0
        for itr, (images, labels) in enumerate(test_dataloader):
            images, labels = images.to(device), labels.to(device)

            pred = model(images)

            loss = criterion(pred, labels)
            total_test_loss += loss.item()

            # we now need softmax because we are testing.
            pred = torch.nn.functional.softmax(pred, dim=1)
            for i, p in enumerate(pred):
                if labels[i] == torch.max(p.data, 0)[1]:
                    total = total + 1

            hidden_layer_activation_list.append(model.layer_activations)

            
            for activation, label in zip(model.layer_activations, labels):
                # shape of activation and label: 256 and 1 
                
                # get the actual value of item. This is because label is now Tensor 
                label = label.item()

                # this is not part of gradient calculcation 
                with torch.no_grad():
                    activation = activation.numpy()

                # for each image/label, append activation value of neuron 
                for i in range(256):    # number of neurons in hidden layer 
                    _hidden_layer_each_neuron_[i][label].append(activation[i])

        avg_selectivity, std_selectivity = selectivity(_hidden_layer_each_neuron_)
        
        final_selectivity_avg_list.append(avg_selectivity)
        final_selectivity_std_list.append(std_selectivity)

        final_spareness.append(hidden_layer_activation_list)

        # caculate accuracy 
        accuracy = total / len(mnist_testset)

        # append accuracy here
        test_acc.append(accuracy)

        # append test loss here 
        total_test_loss = total_test_loss / (itr + 1)
        test_loss.append(total_test_loss)

        print('\nEpoch: {}/{}, Train Loss: {:.8f}, Test Loss: {:.8f}, Test Accuracy: {:.8f}'.format(epoch + 1, no_epochs, total_train_loss, total_test_loss, accuracy))

    sparsity_list = sparsity_calculator(final_spareness)

    average_sparsity = list()
    for i in range(no_epochs):
        average_sparsity.append( (sparsity_list[i].item()) / 1 )
    # ***************** sparsity calculation ***************** #

    print("average_sparsity:", average_sparsity)

    return test_acc, average_sparsity, final_selectivity_avg_list, final_selectivity_std_list

# AdaDelta

In [None]:
model_Adadelta = Model()
print("model_Adadelta:", model_Adadelta)
model_Adadelta.to(device)
model_Adadelta.sigmoid.register_forward_hook(get_activation(model_Adadelta))
optimizer_Adadelta = torch.optim.Adadelta(model_Adadelta.parameters(), lr=1.0)
Adadelta_test_acc, sparsity, Adadelta_avg_selectivity_list, Adadelta_std_selectivity_list = selectivity_trainer(optimizer=optimizer_Adadelta, model=model_Adadelta)

f = open("sorted_sparsity_selectivity_Adadelta.txt", "w")
f.write(str(0)+'\n'+str(Adadelta_test_acc)+'\n'+str(sparsity)+'\n'+str(Adadelta_avg_selectivity_list)+'\n'+str(Adadelta_std_selectivity_list)+'\n\n')
f.close()

!cp sorted_sparsity_selectivity_Adadelta.txt /content/drive/MyDrive

# AdaGrad

In [12]:
model_Adagrad = Model()
print("model_Adagrad:", model_Adagrad)
model_Adagrad.to(device)
model_Adagrad.sigmoid.register_forward_hook(get_activation(model_Adagrad))
optimizer_Adagrad = torch.optim.Adagrad(model_Adagrad.parameters(), lr=0.1)
Adagrad_test_acc, sparsity, Adagrad_avg_selectivity_list, Adagrad_std_selectivity_list = selectivity_trainer(optimizer=optimizer_Adagrad, model=model_Adagrad)

f = open("sorted_sparsity_selectivity_Adagrad.txt", "w")
f.write(str(0)+'\n'+str(Adagrad_test_acc)+'\n'+str(sparsity)+'\n'+str(Adagrad_avg_selectivity_list)+'\n'+str(Adagrad_std_selectivity_list)+'\n\n')
f.close()

!cp sorted_sparsity_selectivity_Adagrad.txt /content/drive/MyDrive

model_Adagrad: Model(
  (linear_1): Linear(in_features=784, out_features=256, bias=True)
  (linear_2): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
)

Epoch: 1/300, Train Loss: 0.13042085, Test Loss: 7.76099038, Test Accuracy: 0.10090000

Epoch: 2/300, Train Loss: 0.09620166, Test Loss: 5.50175063, Test Accuracy: 0.10090000

Epoch: 3/300, Train Loss: 0.05869379, Test Loss: 4.44656155, Test Accuracy: 0.11530000

Epoch: 4/300, Train Loss: 0.04409782, Test Loss: 4.14679881, Test Accuracy: 0.20030000

Epoch: 5/300, Train Loss: 0.03957421, Test Loss: 3.77036811, Test Accuracy: 0.25950000

Epoch: 6/300, Train Loss: 0.03484181, Test Loss: 3.69523552, Test Accuracy: 0.26800000

Epoch: 7/300, Train Loss: 0.03376216, Test Loss: 3.49587876, Test Accuracy: 0.29890000

Epoch: 8/300, Train Loss: 0.03238552, Test Loss: 3.25392950, Test Accuracy: 0.32170000

Epoch: 9/300, Train Loss: 0.03214524, Test Loss: 3.15860635, Test Accuracy: 0.35740000

Epoch: 10/300, Train Loss: 

# SGD


In [13]:
model_SGD = Model()
print("model_SGD:", model_SGD)
model_SGD.to(device)
model_SGD.sigmoid.register_forward_hook(get_activation(model_SGD))
optimizer_SGD = torch.optim.SGD(model_SGD.parameters(), lr=0.1)
SGD_test_acc, sparsity, SGD_avg_selectivity_list, SGD_std_selectivity_list = selectivity_trainer(optimizer=optimizer_SGD, model=model_SGD)

f = open("sorted_sparsity_selectivity_SGD.txt", "w")
f.write(str(0)+'\n'+str(SGD_test_acc)+'\n'+str(sparsity)+'\n'+str(SGD_avg_selectivity_list)+'\n'+str(SGD_std_selectivity_list)+'\n\n')
f.close()

!cp sorted_sparsity_selectivity_SGD.txt /content/drive/MyDrive

model_SGD: Model(
  (linear_1): Linear(in_features=784, out_features=256, bias=True)
  (linear_2): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
)

Epoch: 1/300, Train Loss: 0.07130323, Test Loss: 8.03513433, Test Accuracy: 0.10090000

Epoch: 2/300, Train Loss: 0.09522179, Test Loss: 7.73487459, Test Accuracy: 0.10090000

Epoch: 3/300, Train Loss: 0.10906799, Test Loss: 7.38758298, Test Accuracy: 0.10090000

Epoch: 4/300, Train Loss: 0.10622418, Test Loss: 6.70938220, Test Accuracy: 0.10090000

Epoch: 5/300, Train Loss: 0.09005970, Test Loss: 5.86936565, Test Accuracy: 0.10090000

Epoch: 6/300, Train Loss: 0.07472081, Test Loss: 5.22540113, Test Accuracy: 0.10560000

Epoch: 7/300, Train Loss: 0.06555516, Test Loss: 4.80297432, Test Accuracy: 0.13070000

Epoch: 8/300, Train Loss: 0.06068734, Test Loss: 4.50595841, Test Accuracy: 0.15850000

Epoch: 9/300, Train Loss: 0.05786853, Test Loss: 4.27655988, Test Accuracy: 0.18010000

Epoch: 10/300, Train Loss: 0.05

# Adam

In [14]:
model_Adam = Model()
print("model_Adam:", model_Adam)
model_Adam.to(device)
model_Adam.sigmoid.register_forward_hook(get_activation(model_Adam))
optimizer_Adam = torch.optim.Adam(model_Adam.parameters(), lr=0.001)
Adam_test_acc, sparsity, Adam_avg_selectivity_list, Adam_std_selectivity_list = selectivity_trainer(optimizer=optimizer_Adam, model=model_Adam)

f = open("sorted_sparsity_selectivity_Adam.txt", "w")
f.write(str(0)+'\n'+str(Adam_test_acc)+'\n'+str(sparsity)+'\n'+str(Adam_avg_selectivity_list)+'\n'+str(Adam_std_selectivity_list)+'\n\n')
f.close()

!cp sorted_sparsity_selectivity_Adam.txt /content/drive/MyDrive

model_Adam: Model(
  (linear_1): Linear(in_features=784, out_features=256, bias=True)
  (linear_2): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
)

Epoch: 1/300, Train Loss: 1.04488261, Test Loss: 4.13940607, Test Accuracy: 0.10090000

Epoch: 2/300, Train Loss: 0.50857559, Test Loss: 3.98666899, Test Accuracy: 0.12570000

Epoch: 3/300, Train Loss: 0.22963276, Test Loss: 3.69668182, Test Accuracy: 0.24440000

Epoch: 4/300, Train Loss: 0.14986563, Test Loss: 3.90408527, Test Accuracy: 0.26880000

Epoch: 5/300, Train Loss: 0.12822238, Test Loss: 4.33227836, Test Accuracy: 0.26680000

Epoch: 6/300, Train Loss: 0.11357996, Test Loss: 4.85693891, Test Accuracy: 0.26140000

Epoch: 7/300, Train Loss: 0.10094509, Test Loss: 4.80440885, Test Accuracy: 0.30780000

Epoch: 8/300, Train Loss: 0.09652053, Test Loss: 5.41518896, Test Accuracy: 0.30420000

Epoch: 9/300, Train Loss: 0.08671210, Test Loss: 5.08839709, Test Accuracy: 0.33180000

Epoch: 10/300, Train Loss: 0.0