In [4]:
import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F

''' Parameters (CHange Anything Here!) '''
transform = transforms.ToTensor()
batch_size = 3
#lifetime Sparcity
k_percent = 5


''' Code Starts Here '''
#Data MNIST
mnist_data = datasets.MNIST(root='./data', train = True, download = True, transform = transform)
data_loader = torch.utils.data.DataLoader(dataset= mnist_data, batch_size = batch_size, shuffle = True)

dataiter = iter(data_loader)
images, labels = dataiter.next()


# testing model
''' Conv 2d Layer 
#         Accessible Variables: .weights(Tensor), .bias(Tensor)
#         parameters :
#         torch.nn.Conv2d(in_channels, out_channels, 
#                         kernel_size, stride=1, padding=0, 
#                         dilation=1, groups=1, bias=True, 
#                         padding_mode='zeros')
'''
class Autoencoder_Test(nn.Module):
    def __init__(self):
        super().__init__()

        #Image size:N, 28, 28
        self.conv1 = nn.Conv2d(1, 2, 3, stride=1) # stride 2 will reduce size by half (W - F + 2P)/
        self.decoder = nn.Linear(2 * 26 * 26, 28*28) # input items, output items
        
    def forward(self, x):
        encoded = self.conv1(x) # encode, output: torch.Size([3, 2, 26, 26])
#         x = encoded.view(-1, 2 * 26 * 26) # flattening it out
#         decoded = self.decoder(x) 
#         decoded = decoded.view(3, 1, 28, 28) # converting it back to same format as input
#         #encoded is the output of the layer
        return encoded
    
    # With ReLU
    def forward(self, x, relu = True):
        encoded = F.relu(self.conv1(x)) # encode, output: torch.Size([3, 2, 26, 26])
        x = encoded.view(-1, 2 * 26 * 26) # flattening it out
        decoded = self.decoder(x) 
        decoded = decoded.view(3, 1, 28, 28) # converting it back to same format as input
        #encoded is the output of the layer
        return decoded
    
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss
    
model = Autoencoder_Test()
generator = model.parameters() #(returns a generator)
criterion = RMSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-5)

In [7]:
# Batch Training loop
# Hidden Units here is to be defined as feature maps
# Spatial Sparsity: For every feature Filter, after batch prediction, pick the highest output activity winner and set the rest to 0
# Lifetime Sparsity: For every feature Filter, after batch prediction, pick the hightst k% of all the winners picked in Spatial Sparsity
from sortedcontainers import SortedList, SortedDict

num_epochs = 1
sorted_list = SortedList()
winnersMap = {}

for epoch in range(num_epochs):
#     for (img, labels) in data_loader:
    img, labels = dataiter.next()
    
    with torch.no_grad():
        # First feedforward to get the k% of winners
        feature_map = model(img) # returns the feature maps of all batch examples in order

        # Summing up the activation maps to find the maximum activation hidden map from the the batch
        summation = torch.sum(feature_map, (2, 3)) # reduce the 3rd and 4th dimension of the tensor. Summation is a 2-dim tensor
        print("\n\n Sum of the feature maps(Should have batch_size values): \n", summation)

        # batch_idx: torch tensor with the max batch index, size = num_features
        # max_val:   torch tensor with the max_val for each batch, size = num_features
        max_val, batch_idx = torch.max(summation, 0) # returns a tensor with the size of number of features
        max_val = max_val.numpy()
        batch_idx = batch_idx.numpy()
        print("\n\nMaximum Values: ", max_val, "\nBatch Location Indexes: ", batch_idx)

        # where feature_num starts from 0
        for feature_num, max_values in enumerate(max_val):
        # Have to store list of tuples in sorted dict where tuples = (feature no., index)
        # if there is more than one value in this list, then backprop have to iterate through the list
            if winnersMap.get(max_values) == None:
                winnersMap[max_values] = [(feature_num, batch_idx[feature_num])]
            else:
                winnersMap[max_values] = winnersMap[max_values].append((feature_num, batch_idx[feature_num]))

        sorted_dict = SortedDict(winnersMap) # store and the keys sort Automatically
        print(sorted_dict)
        
        # Constructing the new Tensor with only the k% of the winners
        # This tensor.... requires_grad = True?
        
        
    # 2nd feedforward bias with only the k% of winner batches with relu
    k_forward = model(k_percent_winners, True)
    loss.backward()
    loss = criterion(decoded, img)+
#     layers = model.children()
#     hidden = next(layers)
#     print(next(hidden.parameters()))
# #     for params in hidden.parameters():
# #         print(params.grad)
        
# #         for param in child.parameters():
# #             if
# #             param.grad = 0   

    # Update weights
#     optimizer.step()
#     optimizer.zero_grad()

# Plan reduce 3 batches into 1 



 Sum of the feature maps(Should have batch_size values): 
 tensor([[ 30.3885, -56.0443],
        [ 24.0721, -60.3822],
        [ 24.6637, -59.8583]])


Maximum Values:  [ 30.38851  -56.044304] 
Batch Location Indexes:  [0 0]
SortedDict({-56.044304: [(0, 1)], 30.38851: [(0, 0)]})
Parameter containing:
tensor([[[[ 0.0321,  0.1935,  0.3018],
          [-0.1194, -0.0227, -0.0005],
          [ 0.3243, -0.1935, -0.2744]]],


        [[[-0.0840,  0.2898,  0.2096],
          [-0.1427, -0.2340, -0.0550],
          [-0.1770,  0.0344,  0.3196]]]], requires_grad=True)
