In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

from src import VRAE
from src.utils import *

In [15]:
n_epochs = 3
batch_size = 50

hidden_size = 128
hidden_layer_depth = 1
latent_length = 30
learning_rate = 0.0005
dropout_rate = 0.0
optimizer = 'Adam'                              # options: ADAM, SGD
cuda = True                                     # options: True, False
print_every=30
clip = True                                     # options: True, False
max_grad_norm=5
loss = 'MSELoss'                                # options: SmoothL1Loss, MSELoss
block = 'LSTM'                                  # options: LSTM, GRU
saved_model_path = './saved_models'

In [16]:
directory_list = get_files_directory_list()
directory_list = sorted(directory_list)

random_index = 105
random_path = directory_list[random_index]

X_train, X_val, y_train, y_val = get_data_from_directory(random_path)
X, y = get_data_from_directory(random_path, split=False)


print('Dataset: ', random_path)
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape:  ', X_val.shape)
print('y_test shape:  ', y_val.shape)

Dataset:  SmoothSubspace
X_train shape:  (150, 15, 1)
y_train shape:  (150, 1)
X_test shape:   (150, 15, 1)
y_test shape:   (150, 1)


In [17]:
num_classes = len(np.unique(y_train))
base = np.min(y_train)  # Check if data is 0-based
if base != 0:
    y_train -= base
y_val -= base

In [18]:
train_dataset = TensorDataset(torch.from_numpy(X_train))
test_dataset = TensorDataset(torch.from_numpy(X_val))

In [19]:
sequence_length = X_train.shape[1]
number_of_features = X_train.shape[2]

print('sequence_length', sequence_length)
print('number_of_features', number_of_features)

sequence_length 15
number_of_features 1


In [20]:
# test GPU
torch.zeros(sequence_length, batch_size, 1, requires_grad=True).shape

torch.Size([15, 50, 1])

In [21]:
vrae = VRAE(sequence_length=sequence_length,
            number_of_features = number_of_features,
            hidden_size = hidden_size, 
            hidden_layer_depth = hidden_layer_depth,
            latent_length = latent_length,
            batch_size = batch_size,
            learning_rate = learning_rate,
            n_epochs = n_epochs,
            dropout_rate = dropout_rate,
            optimizer = optimizer, 
            cuda = False,
            print_every=print_every, 
            clip=clip, 
            max_grad_norm=max_grad_norm,
            loss = loss,
            block = block,
            dload = saved_model_path)

In [23]:
vrae.fit(train_dataset)

Epoch: 0
Average loss: 52.4786
Epoch: 1
Average loss: 47.8753
Epoch: 2
Average loss: 46.9353


In [24]:
vrae.save(random_path+'_vrae.pth')
vrae.load(saved_model_path+'/'+random_path+'_vrae.pth')

In [26]:
z_train = vrae.transform(train_dataset)
z_test = vrae.transform(test_dataset)
z_test.shape

(150, 30)

In [None]:
print(random_path)
plot_clustering(z_run, y_val, engine='matplotlib', download=False)