## Install if necessary torchnet and torch

!pip install torch --user
!pip install torchnet --user

## Load library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn.functional as F

from torch.utils.data import DataLoader
from torchnet import meter

from torch.optim import Adam

from vae import VAE

## VAE

Learn more [here](https://towardsdatascience.com/understanding-variational-autoencoders-vaes-f70510919f73)

Code source [here](https://github.com/shib0li/VAE-torch)

![](https://miro.medium.com/max/1400/1*Q5dogodt3wzKKktE0v3dMQ@2x.png)

## Load data and split into train and test set

In [2]:
df = pd.read_csv('data/train.csv', header=None, index_col=0)
df.head()

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.012495,0.011126,0.003252,0.006625
1,0.011439,0.002691,0.001206,0.006947
2,0.000632,0.007277,0.004049,7.4e-05
3,0.017828,0.02821,0.007758,0.007382
4,0.021115,0.019642,0.009238,0.011499


In [3]:
X = df.values
data_dim = X.shape[1]

In [4]:
eval_rate = 0.5

In [5]:
X = torch.tensor(X, dtype=torch.float)
eval_index = int(X.shape[0] * (1 - eval_rate))

train_data = X[0: eval_index]
test_data = X[eval_index:]

train_data.shape, test_data.shape

(torch.Size([373, 4]), torch.Size([373, 4]))

In [6]:
if torch.cuda.is_available():
    print('You use GPU !')
    device = torch.device('cuda')
else:
    print('You use CPU !')
    device = torch.device('cpu')

You use GPU !


## Model configuration

In [7]:
in_dim = X.shape[1]
encoder_width = 256
decoder_width = 256
latent_dim = 64

In [8]:
batch_size=16
max_epoch=1000
lr= 0.001
weight_decay = 0.0075

## Train

In [9]:
# config model
model = VAE(train_data, test_data, in_dim, encoder_width, decoder_width, latent_dim, device)
# train VAE
hist_loss = model.train(batch_size, max_epoch, lr, weight_decay)

100%|██████████| 1000/1000 [00:48<00:00, 20.48it/s, val_loss=0.247, epoch=999]


In [10]:
np.savetxt('figures/loss.csv', hist_loss, delimiter=',')

## Generate new data

In [11]:
Xnoise = model.test2(test_data.shape[0])

In [12]:
Xnoise[:7]

tensor([[[[0.0171, 0.0168, 0.0133, 0.0149],
          [0.0128, 0.0125, 0.0098, 0.0111],
          [0.0117, 0.0114, 0.0088, 0.0100],
          ...,
          [0.0127, 0.0124, 0.0097, 0.0109],
          [0.0233, 0.0229, 0.0186, 0.0206],
          [0.0147, 0.0144, 0.0113, 0.0127]]]], device='cuda:0',
       grad_fn=<SliceBackward0>)

In [15]:
generated_data = Xnoise.cpu().detach().numpy()
generated_data.shape

(1, 1, 373, 4)

In [16]:
generated_data = generated_data.reshape((test_data.shape[0], 4))
generated_data[:2]

array([[0.01712526, 0.01677456, 0.01333138, 0.01494107],
       [0.01281224, 0.01252779, 0.0097755 , 0.01105525]], dtype=float32)

## Check distribution

In [17]:
from scipy import stats # import anderson, kendalltau

In [18]:
corr, p_value = stats.kendalltau(test_data, generated_data)
print(f"correlation : %f - pvalue : %f" % (corr, p_value))

correlation : 0.043719 - pvalue : 0.011403


In [19]:
stats.kendalltau(test_data, test_data)

KendalltauResult(correlation=0.9999999999999998, pvalue=0.0)

In [20]:
stats.kendalltau(test_data, train_data)

KendalltauResult(correlation=0.0016902127690180402, pvalue=0.9220787577366621)

In [23]:
np.mean(generated_data, axis=0,)

array([0.01707156, 0.01672603, 0.01332904, 0.01491779], dtype=float32)

In [25]:
test_data.mean(axis=0)

tensor([0.0138, 0.0134, 0.0099, 0.0108])

In [28]:
test_data.T.cov()

tensor([[1.4305e-04, 6.0533e-05, 4.6876e-05, 6.7385e-05],
        [6.0533e-05, 1.2477e-04, 7.1715e-05, 3.8462e-05],
        [4.6876e-05, 7.1715e-05, 9.8347e-05, 3.6174e-05],
        [6.7385e-05, 3.8462e-05, 3.6174e-05, 7.7050e-05]])

In [31]:
np.cov(generated_data.T,)

array([[2.47047038e-05, 2.43569856e-05, 2.06774969e-05, 2.24425764e-05],
       [2.43569856e-05, 2.40141944e-05, 2.03867802e-05, 2.21268881e-05],
       [2.06774969e-05, 2.03867802e-05, 1.73098700e-05, 1.87859748e-05],
       [2.24425764e-05, 2.21268881e-05, 1.87859748e-05, 2.03886890e-05]])