In [2]:
'''
By : chris-wei-xun.lai
'''

import torch
import torch.nn as nn
import torchvision
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
import matplotllib.pyplot as plt

### Parameters ### (Change anything here)

sparsity_rate = 5 # in percentage
batch_size = 5
num_epochs = 5

##########################################

### DATASET ###

# MNIST handwriting Digits
mnist_data = datasets.MNIST(root = './data', train = True, download = True, transform = transform)
data_loader = torch.utils.data.DataLoader(dataset = mnist_data, batch_size = 64, shuffle = True)

# LED Dataset (Remember to normalize the input data to [0,1])


dataiter = iter(data_loader)
images, labels = dataiter.next()



ModuleNotFoundError: No module named 'torch'

## Initializing weights
First is to have a function that initialize linear layer weights as we define each layer  
Source: 
- https://stackoverflow.com/questions/49433936/how-to-initialize-weights-in-pytorch

Code:
```python
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)

net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
net.apply(init_weights)

```
<br>

---  
Second, to tweak the weights after we're done creating all the layers <br>
Source: 
- https://www.youtube.com/watch?v=nA6oEAE9IVc&list=PLTKMiZHVd_2KJtIXOW0zFhFfBaJJilH51&index=88
- https://github.com/rasbt/stat453-deep-learning-ss21/blob/main/L11/code/weight_normal.ipynb

Code:
```python
class MultilayerPerceptron(torch.nn.Module):

    def __init__(self, num_features, num_classes, drop_proba, 
                 num_hidden_1, num_hidden_2):
        super().__init__()

        self.my_network = torch.nn.Sequential(
            # 1st hidden layer
            torch.nn.Flatten(),
            torch.nn.Linear(num_features, num_hidden_1),
            torch.nn.ReLU(),
            # 2nd hidden layer
            torch.nn.Linear(num_hidden_1, num_hidden_2),
            torch.nn.ReLU(),
            # output layer
            torch.nn.Linear(num_hidden_2, num_classes)
        )

        for m in self.modules():
            if isinstance(m, torch.nn.Linear):
                m.weight.detach().normal_(0, 0.001)
                if m.bias is not None:
                    m.bias.detach().zero_()

    def forward(self, x):
        logits = self.my_network(x)
        return logits
```

In [None]:
### MODEL ###
# Competitive autoencoder

class CompAutoencoder(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        # Initialize super class
        super(CompAutoencoder, self).__init__()
        
        # Method to initialize weights using Kaiming He method, where a = sqrt(6/input_size)
        def init_weights(m):
            if type(m) == nn.Linear:
                _a = sqrt(6/input_size)
                torch.nn.init.kaiming_uniform_(m.weight, a = _a, mode='fan_in', nonlinearity='leaky_relu')
                # torch.nn.init.xavier_uniform(m.weight)
                m.bias.data.fill_(0.01)

        self.encoder = nn.Sequential(
                                     nn.Conv2d(1, 16, 3, stride=2, padding=1),
                                     nn.ReLU(),
                                     nn.Conv2d(1, 16, 3, stride=2, padding=1),
                                     nn.ReLU()
                                     )
        
        self.decoder = nn.Sequential(
                                     nn.Linear(2, 2)
                                     nn.Sigmoid() #Because the input in 0 and 1, the output should also be between 0 and 1
                                     )
        
        self.decoder.apply(init_weights)
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
model = CompAutoencoder()


## Creating your own Loss Functions
source: 
- https://discuss.pytorch.org/t/rmse-loss-function/16540/3
        
```python
# You should be careful with NaN which will appear if the mse=0. Something like this would probably be better :
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps
        
    def forward(self,yhat,y):
        loss = torch.sqrt(self.mse(yhat,y) + self.eps)
        return loss
```

In [None]:
criterion = nn.MSELoss()
#loss = torch.sqrt(criterion(x, y))
optimizer = optim.SGD(model.parameters(), lr=0.001)
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)