In [85]:
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import Image, display, clear_output
import numpy as np
%matplotlib nbagg
%matplotlib inline
import seaborn as sns
sns.set_style("whitegrid")
sns.set_palette(sns.dark_palette("purple"))

In [86]:
import torch
cuda = torch.cuda.is_available()

from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.datasets import MNIST
# We need some other data!!
from torchvision.transforms import ToTensor
from functools import reduce

# Flatten the images into a vector
flatten = lambda x: ToTensor()(x).view(28**2)

# Define the train and test sets
dset_train = MNIST("./", train=True,  transform=flatten, download=True)
dset_test  = MNIST("./", train=False, transform=flatten)

# The digit classes to use
#classes = [3, 7]
classes=[0, 1, 4, 9]

def stratified_sampler(labels):
    """Sampler that only picks datapoints corresponding to the specified classes"""
    (indices,) = np.where(reduce(lambda x, y: x | y, [labels.numpy() == i for i in classes]))
    indices = torch.from_numpy(indices)
    return SubsetRandomSampler(indices)


batch_size = 64
# The loaders perform the actual work
train_loader = DataLoader(dset_train, batch_size=batch_size,
                          sampler=stratified_sampler(dset_train.train_labels), pin_memory=cuda)
test_loader  = DataLoader(dset_test, batch_size=batch_size, 
                          sampler=stratified_sampler(dset_test.test_labels), pin_memory=cuda)

In [88]:
import pandas as pd
train_data = pd.read_table('ECG5000\ECG5000_TRAIN.tsv')
test_data = pd.read_table('ECG5000\ECG5000_TEST.tsv')

In [89]:
train_data.shape
train_data.iloc[300,1:140]
torch.zeros(train_data.shape[0], train_data.shape[1]-1).size()

torch.Size([499, 140])

In [145]:
class TrainDataset(torch.utils.data.Dataset):
  def __init__(self, window_size=100):
    #self.oudataframe = pd.DataFrame(np.random.randn(400, 2))
    #self.window_size = window_size
    self.traindataframe = train_data

  def __len__(self):
    return train_data.shape[1]-1 # This is always 140 for this dataset
  
  def __getitem__(self, idx):
    #idx = idx * self.window_size #Indexing should be straight forward here
    print('window: {}-{}'.format(idx, idx))
    #data = torch.zeros(train_data.shape[0], train_data.shape[1]-1)
    #target = torch.zeros(train_data.shape[0],1)
    #for i in range(0, train_data.shape[0]):
      #data[i] = torch.tensor(self.traindataframe.iloc[i,1:141])
      #target[i] = torch.tensor(self.traindataframe.iloc[i,0])
    data = torch.tensor(self.traindataframe.iloc[idx,1:141])
    target = torch.tensor(self.traindataframe.iloc[idx,0])
    return data, target

class TestDataset(torch.utils.data.Dataset):
  def __init__(self, window_size=100):
    #self.oudataframe = pd.DataFrame(np.random.randn(400, 2))
    #self.window_size = window_size
    self.testdataframe = train_data

  def __len__(self):
    return train_data.shape[1]-1 # This is always 140 for this dataset
  
  def __getitem__(self, idx):
    #idx = idx * self.window_size #Indexing should be straight forward here
    print('window: {}-{}'.format(idx, idx))
    #data = torch.zeros(test_data.shape[0], test_data.shape[1]-1)
    #target = torch.zeros(test_data.shape[0],1)
    #for i in range(0, train_data.shape[0]):
      #data[i] = torch.tensor(self.testdataframe.iloc[i,1:141])
      #target[i] = torch.tensor(self.testdataframe.iloc[i,0])
    data = torch.tensor(self.testdataframe.iloc[idx,1:141])
    target = torch.tensor(self.testdataframe.iloc[idx,0])
    return data, target

dset_train = TrainDataset()
dset_test = TestDataset()
batch_size = 5
# The loaders perform the actual work
train_loader = DataLoader(dset_train, batch_size=batch_size)
                          #,sampler=stratified_sampler(dset_train.train_labels), pin_memory=cuda)
test_loader  = DataLoader(dset_test, batch_size=batch_size) 
                          #,sampler=stratified_sampler(dset_test.test_labels), pin_memory=cuda)

In [148]:
data, target = next(iter(test_loader))
print(data.shape)
print(target.shape)
target


window: 0-0
window: 1-1
window: 2-2
window: 3-3
window: 4-4
torch.Size([5, 140])
torch.Size([5])


tensor([1, 1, 1, 1, 1])

In [138]:
nn.LSTM(input_dim, hidden_dim,
                            n_layers, bidirectional=True)

LSTM(1, 128, bidirectional=True)

Build the model

In [164]:
import torch.nn as nn
from torch.nn.functional import softplus

# define size variables
num_features = 28**2

class VariationalAutoencoder(nn.Module):
    def __init__(self, latent_features, num_samples):
        super(VariationalAutoencoder, self).__init__()
        
        self.latent_features = latent_features
        self.num_samples = num_samples

        # We encode the data onto the latent space using two linear layers
        self.encoder = nn.Sequential(
            nn.Linear(in_features=140, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=128),
            nn.ReLU(),
            # A Gaussian is fully characterised by its mean and variance
            nn.Linear(in_features=128, out_features=2*self.latent_features) # <- note the 2*latent_features
        )
        
        # The latent code must be decoded into the original image
        self.decoder = nn.Sequential(
            nn.Linear(in_features=self.latent_features, out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256, out_features=140)
        )
        

    def forward(self, x): 
        outputs = {}
        
        # Split encoder outputs into a mean and variance vector
        mu, log_var = torch.chunk(self.encoder(x), 2, dim=-1)
        
        # :- Reparametrisation trick
        # a sample from N(mu, sigma) is mu + sigma * epsilon
        # where epsilon ~ N(0, 1)
                
        # Don't propagate gradients through randomness
        with torch.no_grad():
            batch_size = mu.size(0)
            epsilon = torch.randn(batch_size, self.num_samples, self.latent_features)
            
            if cuda:
                epsilon = epsilon.cuda()
        
        sigma = torch.exp(log_var/2)
        
        # We will need to unsqueeze to turn
        # (batch_size, latent_dim) -> (batch_size, 1, latent_dim)
        z = mu.unsqueeze(1) + epsilon * sigma.unsqueeze(1)        
        
        # Run through decoder
        x = self.decoder(z)
        
        # The original digits are on the scale [0, 1]
        #x = torch.sigmoid(x)
        
        # Mean over samples
        x_hat = torch.mean(x, dim=1)
        
        outputs["x_hat"] = x_hat
        outputs["z"] = z
        outputs["mu"] = mu
        outputs["log_var"] = log_var
        
        return outputs


latent_features = 5
num_samples = 10

net = VariationalAutoencoder(latent_features, num_samples)

# Transfer model to GPU if available
if cuda:
    net = net.cuda()

print(net)

VariationalAutoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=140, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=10, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=5, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=140, bias=True)
  )
)


In [139]:
import torch.nn as nn
from torch.nn.functional import softplus

# define size variables
input_dim = 1
hidden_dim = 128
n_layers = 1
z_dim = 10

class VariationalAutoencoder(nn.Module):
    def __init__(self, latent_features, num_samples):
        super(VariationalAutoencoder, self).__init__()
        

        # We encode the data onto the latent space using two linear layers
        self.encoder = nn.Sequential(
            nn.LSTM(input_dim, hidden_dim,
                            n_layers, bidirectional=True),
            nn.Tanh(),
            # A Gaussian is fully characterised by its mean and variance
            # A Hidden layer that produces 10 outputs from 10 inputs (??)
            nn.Linear(hidden_dim, out_features=z_dim) 
        )
        
        # The latent code must be decoded into the original image
        self.decoder = nn.Sequential(
            nn.LSTM(z_dim, hidden_dim,
                            n_layers, bidirectional=True),
        )
        

    def forward(self, x): 
        outputs = {}
        
        # Split encoder outputs into a mean and variance vector
        mu, log_var = torch.chunk(self.encoder(x), 2, dim=-1)
        
        # :- Reparametrisation trick
        # a sample from N(mu, sigma) is mu + sigma * epsilon
        # where epsilon ~ N(0, 1)
                
        # Don't propagate gradients through randomness
        with torch.no_grad():
            batch_size = mu.size(0)
            epsilon = torch.randn(batch_size, self.num_samples, self.latent_features)
            
            if cuda:
                epsilon = epsilon.cuda()
        
        #sigma = torch.exp(log_var/2)
        #We use softplus as in the article
        sigma = nn.functional.softplus(log_var/2)
        
        # We will need to unsqueeze to turn
        # (batch_size, latent_dim) -> (batch_size, 1, latent_dim)
        z = mu.unsqueeze(1) + epsilon * sigma.unsqueeze(1)        
        
        # Run through decoder
        x = self.decoder(z)
        
        # The original digits are on the scale [0, 1]
        x = torch.sigmoid(x)
        
        # Mean over samples
        x_hat = torch.mean(x, dim=1)
        
        outputs["x_hat"] = x_hat
        outputs["z"] = z
        outputs["mu"] = mu
        outputs["log_var"] = log_var
        
        return outputs
    
latent_features = 10
num_samples = 10

net = VariationalAutoencoder(latent_features, num_samples)

print(net)

VariationalAutoencoder(
  (encoder): Sequential(
    (0): LSTM(1, 128, bidirectional=True)
    (1): Tanh()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
  (decoder): Sequential(
    (0): LSTM(10, 128, bidirectional=True)
  )
)


In [165]:
from torch.nn.functional import binary_cross_entropy
from torch import optim

def ELBO_loss(y, t, mu, log_var):
    # Reconstruction error, log[p(x|z)]
    # Sum over features
    likelihood = -binary_cross_entropy(y, t, reduction="none")
    likelihood = likelihood.view(likelihood.size(0), -1).sum(1)

    # Regularization error: 
    # Kulback-Leibler divergence between approximate posterior, q(z|x)
    # and prior p(z) = N(z | mu, sigma*I).
    
    # In the case of the KL-divergence between diagonal covariance Gaussian and 
    # a standard Gaussian, an analytic solution exists. Using this excerts a lower
    # variance estimator of KL(q||p)
    kl = -0.5 * torch.sum(1 + log_var - mu**2 - torch.exp(log_var), dim=1)

    # Combining the two terms in the evidence lower bound objective (ELBO) 
    # mean over batch
    ELBO = torch.mean(likelihood) - torch.mean(kl)
    
    # notice minus sign as we want to maximise ELBO
    return -ELBO, kl.sum()


# define our optimizer
# The Adam optimizer works really well with VAEs.
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = ELBO_loss

In [167]:
from torch.autograd import Variable

x, _ = next(iter(train_loader))
x = Variable(x)

if cuda:
    x = x.cuda()

outputs = net(x)

x_hat = outputs["x_hat"]
mu, log_var = outputs["mu"], outputs["log_var"]
z = outputs["z"]

#loss, kl = loss_function(x_hat, x, mu, log_var)

print(x.shape)
print(x_hat.shape)
print(z.shape)
#print(loss)
#print(kl)


window: 0-0
window: 1-1
window: 2-2
window: 3-3
window: 4-4
torch.Size([5, 140])
torch.Size([5, 140])
torch.Size([5, 10, 5])


In [173]:
print(x_hat[1,:])
print(x[1,:])

tensor([-5.0170e-02,  6.7447e-02, -9.0022e-02, -6.7385e-02,  4.9073e-02,
        -4.5537e-02, -9.4199e-03, -1.0618e-02, -3.6856e-02, -1.0409e-02,
         1.5100e-01, -6.4907e-02,  6.8316e-02,  1.7823e-01,  1.6770e-01,
        -1.4425e-01,  2.5562e-02, -5.4184e-02, -3.8584e-02,  8.8814e-02,
         4.1699e-02, -8.6738e-02,  7.2881e-02,  1.0035e-01, -8.3512e-02,
        -4.9836e-02,  6.0432e-02,  5.1704e-02,  7.1146e-02,  7.8680e-02,
         7.5735e-02, -2.5655e-02,  9.3454e-03,  3.7206e-02, -1.4947e-02,
        -7.2569e-03, -1.3486e-02,  9.5814e-02, -1.3894e-01,  6.2074e-02,
        -7.0619e-02, -9.1724e-02, -2.8053e-02, -1.9009e-01, -3.2121e-02,
         1.7492e-01,  2.5640e-02,  1.4209e-01, -1.0982e-01, -6.9626e-03,
         1.8786e-02,  4.0107e-02,  2.2223e-02, -4.7197e-02, -7.5971e-02,
         1.2898e-01,  2.5099e-02, -7.1415e-02,  3.5252e-02,  5.1161e-02,
         1.4526e-01,  1.4704e-01, -9.2392e-02, -6.6892e-02, -9.1430e-02,
         5.5615e-02, -1.1123e-02, -3.8603e-03, -1.0

In [None]:
#dataSet = TestDataset()
#dataloader = DataLoader(dataSet, batch_size=2, shuffle=False)
#for idx, (data, target) in enumerate(dataloader):
#    print('BatchIdx {}, data.shape {}, target.shape {}'.format(
#            idx, data.shape, target.shape))