## Sequence Variational Encoder 

In this notebook we implement a encoder similar to the one explained in the paper
https://arxiv.org/abs/1803.05428

In [1]:
import torch
import torch.nn as nn

# Libraries useful for the test dataset
import seaborn as sns
import numpy as np

In [2]:
def create_input_sequences(input_data, tw):
    input_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        input_seq.append(train_seq)
    return input_seq

In [3]:
# Here we create a set sequences of data
flight_data = sns.load_dataset("flights")
all_data = flight_data['passengers'].values.astype(float)
test_data_size = 12

train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
train_window = 12
train_input_seq = create_input_sequences(train_data_normalized, train_window)

In [4]:
# The input_size is the size of the sequence
# The latent_dim is the dimension of the latent space
class Encoder(nn.Module):
    def __init__(self, input_size, latent_dim):
        super().__init__()
        self.input_size = input_size
        self.latent_dim = latent_dim        
        self.hidden_cell = (torch.zeros(2,1,self.input_size),
                            torch.zeros(2,1,self.input_size))
        
        self.lstm = nn.LSTM(input_size, input_size, bidirectional = True)
        self.mu = nn.Linear(2*input_size, latent_dim)
        self.logvar = nn.Linear(2*input_size, latent_dim)
        
    def sample(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std
          
    def reset_hidden(self):
        self.hidden_cell = (torch.zeros(2,1,self.input_size),
                            torch.zeros(2,1,self.input_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(-1 ,1, len(input_seq)), self.hidden_cell)
        hidden_seq = torch.cat((self.hidden_cell[0][0], self.hidden_cell[0][1]), 1)
        mu = self.mu(hidden_seq)
        logvar = self.logvar(hidden_seq)
        return self.sample(mu, logvar), mu, logvar

In [5]:
encoder = Encoder(12,4)

In [6]:
latent_space_mapping = encoder(train_input_seq[0])

In [7]:
latent_space_mapping

(tensor([[-0.9346,  0.3562, -0.1516,  0.2647]], grad_fn=<AddBackward0>),
 tensor([[-0.0870,  0.0698,  0.1671, -0.0056]], grad_fn=<AddmmBackward>),
 tensor([[ 0.0080, -0.0131, -0.0109,  0.2130]], grad_fn=<AddmmBackward>))