# Hidden layer represenation
Filippo Fantinato 2041620

On this colab I implemented the autoencoder showed by you during the lesson 9 exploiting pytorch.

In [26]:
import numpy as np
from torch import nn, optim
import torch

torch.manual_seed(42)

<torch._C.Generator at 0x7fadcd293630>

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

The following class represents the Autoencoder of size $ input\_size \times encoding\_dim \times input\_size $ with Sigmoid as activation function.

In [28]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_dim):
        super().__init__()
        self.input  = nn.Linear(input_size, encoding_dim)
        self.output = nn.Linear(encoding_dim, input_size)
        
    def forward(self, x):
      x = self.input(x)
      x = torch.sigmoid(x)
      x = self.output(x)
      x = torch.sigmoid(x)

      return x

Let's set the input size to $8$ and the encoding dim to $3$, state the data to learn $X$

In [29]:
input_size = 8
encoding_dim = 3

X = torch.tensor([
    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
])

and instantiate the model, the MSE loss function and the Adam optimizer with learning rate equals to $0.005$.

In [30]:
model = Autoencoder(input_size, encoding_dim).to(device)
mse_loss = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=5e-3)

Let's train the model with 1000 epochs.

In [31]:
epochs = 1000
model.train(True)
for epoch in range(1, epochs + 1):
  loss_value = 0
  for x in X:
    optimizer.zero_grad()
    y = model.forward(x)
    loss = mse_loss(x, y)
    loss_value += loss.item()
    loss.backward()
    optimizer.step()
  
  print("Epoch number: ", epoch, "; Loss: ", loss_value)

model.train(False)

Epoch number:  1 ; Loss:  2.2795699536800385
Epoch number:  2 ; Loss:  2.159386456012726
Epoch number:  3 ; Loss:  2.049606040120125
Epoch number:  4 ; Loss:  1.9468650221824646
Epoch number:  5 ; Loss:  1.8504043370485306
Epoch number:  6 ; Loss:  1.7596189677715302
Epoch number:  7 ; Loss:  1.6741009205579758
Epoch number:  8 ; Loss:  1.5936513543128967
Epoch number:  9 ; Loss:  1.5182380974292755
Epoch number:  10 ; Loss:  1.4479289501905441
Epoch number:  11 ; Loss:  1.3828266561031342
Epoch number:  12 ; Loss:  1.3230146914720535
Epoch number:  13 ; Loss:  1.2685189843177795
Epoch number:  14 ; Loss:  1.2192838788032532
Epoch number:  15 ; Loss:  1.1751644760370255
Epoch number:  16 ; Loss:  1.1359301954507828
Epoch number:  17 ; Loss:  1.1012791097164154
Epoch number:  18 ; Loss:  1.0708565190434456
Epoch number:  19 ; Loss:  1.0442769899964333
Epoch number:  20 ; Loss:  1.021143488585949
Epoch number:  21 ; Loss:  1.0010647103190422
Epoch number:  22 ; Loss:  0.9836680218577385


Autoencoder(
  (input): Linear(in_features=8, out_features=3, bias=True)
  (output): Linear(in_features=3, out_features=8, bias=True)
)

After the training, I end up with the following learned weights that if rounded and assigning 1 to the positive ones and 0 to the negative ones, turns up that the learned representations is exactly the 3 bit binary encoding. 

In [32]:
print(model.output.weight)
print(torch.round(model.output.weight).detach().apply_(lambda x: 1 if x > 0 else 0))

Parameter containing:
tensor([[ 2.2137,  2.6305, -8.7743],
        [ 2.7271, -7.9715,  2.9562],
        [-6.6547, -7.4297,  2.0704],
        [ 0.0459, -0.2461, -0.9010],
        [-6.9716,  2.3367, -7.0172],
        [ 0.9153,  1.1483,  1.2592],
        [-9.5167,  2.5303,  2.6955],
        [ 2.7667, -7.8737, -7.9137]], requires_grad=True)
tensor([[1., 1., 0.],
        [1., 0., 1.],
        [0., 0., 1.],
        [0., 0., 0.],
        [0., 1., 0.],
        [1., 1., 1.],
        [0., 1., 1.],
        [1., 0., 0.]])
