In [63]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F

''' Parameters (CHange Anything Here!) '''
transform = transforms.ToTensor()
batch_size = 3
#lifetime Sparcity
k_percent = 5


''' Code Starts Here '''
#Data MNIST
mnist_data = datasets.MNIST(root='./data', train = True, download = True, transform = transform)
data_loader = torch.utils.data.DataLoader(dataset= mnist_data, batch_size = batch_size, shuffle = True)

dataiter = iter(data_loader)
images, labels = dataiter.next()


# testing model
''' Conv 2d Layer 
#         Accessible Variables: .weights(Tensor), .bias(Tensor)
#         parameters :
#         torch.nn.Conv2d(in_channels, out_channels, 
#                         kernel_size, stride=1, padding=0, 
#                         dilation=1, groups=1, bias=True, 
#                         padding_mode='zeros')
'''
class Autoencoder_Test(nn.Module):
    def __init__(self):
        super().__init__()

        #Image size:N, 28, 28
        self.conv1 = nn.Conv2d(1, 2, 3, stride=1) # stride 2 will reduce size by half (W - F + 2P)/
        self.decoder = nn.Linear(2 * 26 * 26, 28*28) # input items, output items
        
    def forward(self, x):
        encoded = self.conv1(x) # 
        print("\nEncoder Output Size : \n", encoded.size())
        x = encoded.view(-1, 2 * 26 * 26)
        decoded = self.decoder(x)
        decoded = decoded.view(3, 1, 28, 28)
        #encoded is the output of the layer
        return encoded, decoded
    
#     def decode (self, x):
#         decoded = self.decoder(x)
#         return decoded
    
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss
    
model = Autoencoder_Test()
generator = model.parameters() #(returns a generator)
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)


In [65]:
#Testing for model output size
img, _ = dataiter.next()
encoded, decoded = model(img)
print(encoded.size())
print(decoded.size())
print(decoded)
print(img.size())


Encoder Output Size : 
 torch.Size([3, 2, 26, 26])
torch.Size([3, 2, 26, 26])
torch.Size([3, 28, 28])
tensor([[[-0.0287,  0.0710,  0.2128,  ..., -0.1840,  0.1458,  0.1766],
         [-0.1088,  0.1234,  0.2379,  ..., -0.2177,  0.0156,  0.0575],
         [-0.0335, -0.1569,  0.4818,  ..., -0.0211,  0.2814,  0.4293],
         ...,
         [-0.0795, -0.0854,  0.1426,  ..., -0.0689,  0.0270, -0.1244],
         [-0.0447,  0.0539,  0.0855,  ..., -0.2471, -0.0073, -0.0243],
         [ 0.0219,  0.0731,  0.2498,  ...,  0.1524,  0.1936, -0.0947]],

        [[ 0.0163, -0.0012,  0.1540,  ..., -0.1624,  0.0271,  0.1217],
         [-0.0527,  0.1910,  0.1957,  ..., -0.2279, -0.0806,  0.2667],
         [ 0.0214,  0.0258,  0.4219,  ...,  0.0884,  0.2397,  0.5031],
         ...,
         [-0.0099, -0.1998,  0.2910,  ..., -0.0292,  0.1540, -0.0778],
         [-0.1572,  0.1589,  0.0561,  ..., -0.1509, -0.0122, -0.1615],
         [ 0.0965,  0.0101,  0.3438,  ...,  0.1533,  0.1266, -0.1582]],

        [[-0.

In [31]:
# Testing cell for freezing gradients
layers = model.children() # returns a generator
hidden = next(layers)
fcl = next(layers)
print(hidden) 
print(fcl) 

Conv2d(1, 2, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Linear(in_features=1568, out_features=784, bias=True)


In [36]:
print(hidden.parameters())
for params in hidden.parameters():
    print(params.grad)

<generator object Module.parameters at 0x00000245543A14A0>
None
None


In [47]:
# Batch Training loop
# Hidden Units here is to be defined as feature maps
# Spatial Sparsity: For every feature Filter, after batch prediction, pick the highest output activity winner and set the rest to 0
# Lifetime Sparsity: For every feature Filter, after batch prediction, pick the hightst k% of all the winners picked in Spatial Sparsity
from sortedcontainers import SortedList, SortedDict

num_epochs = 1
sorted_list = SortedList()
winnersMap = {}

for epoch in range(num_epochs):
#     for (img, labels) in data_loader:
    img, labels = dataiter.next()
    feature_map = model(img) # returns the feature maps of all batch examples in order
#     decoded = model.decode(feature_map)
    loss = criterion(feature_map, img)
    loss.backward()
    
#     with torch.no_grad():
#         # Summing up the activation maps to find the maximum activation hidden map from the the batch
#         summation = torch.sum(feature_map, (2, 3)) # reduce the 3rd and 4th dimension of the tensor. Summation is a 2-dim tensor
#         print("\n\n Sum of the feature maps(Should have batch_size values): \n", summation)

#         # batch_idx: torch tensor with the max batch index, size = num_features
#         # max_val:   torch tensor with the max_val for each batch, size = num_features
#         max_val, batch_idx = torch.max(summation, 0) # returns a tensor with the size of number of features
#         max_val = max_val.numpy()
#         batch_idx = batch_idx.numpy()
#         print("\n\nMaximum Values: ", max_val, "\nBatch Location Indexes: ", batch_idx)

#         # where feature_num starts from 0
#         for feature_num, max_values in enumerate(max_val):
#         # Have to store list of tuples in sorted dict where tuples = (feature no., index)
#         # if there is more than one value in this list, then backprop have to iterate through the list
#             if winnersMap.get(max_values) == None:
#                 winnersMap[max_values] = [(batch_idx[feature_num], feature_num)]
#             else:
#                 winnersMap[max_values] = winnersMap[max_values].append((batch_idx[feature_num], feature_num))

#         sorted_dict = SortedDict(winnersMap) # store and the keys sort Automatically
#         print(sorted_dict)
        
#     Set the .grad attribute of the hidden units who are not winners to 0
#     for i in range( ((k_percent/100)*num_features).floor() ):
    
    # calculating the gradient freezing the gradient of the hidden layer
    
    layers = model.children()
    hidden = next(layers)
    for params in hidden.parameters():
        print(params.grad)
        
#         for param in child.parameters():
#             if
#             param.grad = 0   

    # Update weights
#     optimizer.step()
#     optimizer.zero_grad()

NameError: name 'decoded' is not defined