In [1]:
import os
import pickle
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_probability as tfp
from sklearn.mixture import GaussianMixture


### Data

In [2]:
x_reduced = [str(i) for i in range(10)]
x_reduced

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [3]:
x_random = {}
for i in range(10):
    x_random[x_reduced[i]] = np.random.uniform(low=0, 
                                                high=1,
                                                size=100)
    
x_random_df = pd.DataFrame(data=x_random)

In [9]:
pld_complete_range = x_random_df
pld_complete_range

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.754648,0.568600,0.179062,0.638155,0.027378,0.924834,0.511803,0.265009,0.044962,0.671032
1,0.362543,0.651826,0.379083,0.559643,0.112866,0.821646,0.517223,0.830301,0.705959,0.462984
2,0.544954,0.471577,0.450189,0.296206,0.585435,0.444555,0.370667,0.856696,0.050579,0.071493
3,0.290611,0.684190,0.899981,0.143347,0.816143,0.146984,0.012126,0.387605,0.310958,0.652850
4,0.218154,0.573345,0.644576,0.082522,0.231859,0.776435,0.575449,0.977274,0.891450,0.754766
...,...,...,...,...,...,...,...,...,...,...
95,0.154531,0.103619,0.064677,0.631448,0.777555,0.071629,0.396080,0.483014,0.614098,0.709074
96,0.885422,0.738188,0.574697,0.405136,0.846041,0.895292,0.854100,0.858339,0.689148,0.491511
97,0.944302,0.678104,0.850284,0.371537,0.105801,0.908954,0.074016,0.248671,0.232950,0.341122
98,0.401060,0.166863,0.340361,0.248153,0.794704,0.482803,0.906580,0.010509,0.056129,0.648684


### TSGAN (Time-Series)

Paper: https://papers.nips.cc/paper/2019/file/c9efe5f26cd17ba6216bbe2a7d26d490-Paper.pdf

Source code: https://github.com/ydataai/ydata-synthetic

Describe: https://towardsdatascience.com/synthetic-time-series-data-a-gan-approach-869a984f2239

###### Define Model hyperparameters
- Networks: Generator; Discriminator; Embedder; Recovery Network

TimeGAN is a Generative model based on RNN networks. In this package the implemented version follows a very simple architecture that is shared by the four elements of the GAN.
Similarly to other parameters, the architectures of each element should be optimized and tailored to the data.

In [5]:
from ydata_synthetic.synthesizers.timeseries import TimeGAN

In [6]:
from ydata_synthetic.preprocessing.timeseries import utils as pp

In [15]:
#Specific to TimeGANs
seq_len=24
n_seq = len(x_reduced)
hidden_dim=24
gamma=1

noise_dim = 32
dim = 128
batch_size = 128


log_step = 100
learning_rate = 5e-4

# taken from the source code: https://github.com/ydataai/ydata-synthetic/blob/49f62216cd3314a4a25ae257535028629ce48bcf/src/ydata_synthetic/synthesizers/gan.py#L22

#          [batch_size, learning_rate, beta_1,   beta_2,  noise_dim, data_dim, layers_dim] 
gan_args = [batch_size, learning_rate, noise_dim, 24,     2,         (0, 1),   dim]

In [16]:
np.random.seed(42)
n_samples = pld_complete_range.shape[0]
n_train_samples = int(.7*n_samples)
all_indices = np.arange(n_samples).tolist()
train_indices = np.random.choice(all_indices, n_train_samples, replace=False).tolist()
test_indices = list(set(all_indices) - set(train_indices))



In [17]:
data_train = pp.real_data_loading(pld_complete_range.iloc[train_indices, 
                                                          :].values, 1)  # window len=1

data_test = pp.real_data_loading(pld_complete_range.iloc[test_indices, 
                                                        :].values, 1)  # window len=1

# y_train = pp.real_data_loading(pld_complete_range_reduced.iloc[train_indices, -1].values, 1)
# y_test = pp.real_data_loading(pld_complete_range_reduced.iloc[test_indices, -1].values, 1)

In [18]:
len(data_train), data_train[0].shape

(69, (1, 10))

In [19]:
synth = TimeGAN(model_parameters=gan_args, hidden_dim=24, seq_len=seq_len, n_seq=n_seq, gamma=1)
synth.train(data_train, train_steps=5)
# synth.save('synthesizer_pld.pkl')

Emddeding network training:   0%|          | 0/5 [00:00<?, ?it/s]


ValueError: in user code:

    /home/soroosh/.local/lib/python3.7/site-packages/ydata_synthetic/synthesizers/timeseries/timegan/model.py:108 train_autoencoder  *
        x_tilde = self.autoencoder(x)
    /home/soroosh/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__  **
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /home/soroosh/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py:274 assert_input_compatibility
        ', found shape=' + display_shape(x.shape))

    ValueError: Input 0 is incompatible with layer model: expected shape=(None, 24, 10), found shape=(69, 1, 10)
