In [1]:
! pip install ydata-synthetic==1.1.0

Collecting ydata-synthetic==1.1.0
  Downloading ydata_synthetic-1.1.0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting requests<2.29,>=2.24.0 (from ydata-synthetic==1.1.0)
  Downloading requests-2.28.2-py3-none-any.whl.metadata (4.6 kB)
Collecting pandas==1.5.* (from ydata-synthetic==1.1.0)
  Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting numpy==1.23.* (from ydata-synthetic==1.1.0)
  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting scikit-learn==1.2.* (from ydata-synthetic==1.1.0)
  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting matplotlib==3.6.* (from ydata-synthetic==1.1.0)
  Downloading matplotlib-3.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting tensorflow==2.11.0 (from ydata-synthetic==1.1.0)
  Downloading tensorflow-2.11.0-cp310-cp310-many

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from ydata_synthetic.synthesizers.timeseries import TimeGAN
from ydata_synthetic.synthesizers.gan import ModelParameters
from ydata_synthetic.preprocessing.timeseries.utils import real_data_loading

In [2]:
from google.colab import drive
drive.mount('/content/drive')

file_path = '/content/drive/My Drive/PEMS-BAY.csv'
data = pd.read_csv(file_path, na_values=[0])
data.dropna(inplace=True)
data.drop(columns=['Timestamp'], inplace=True)

Mounted at /content/drive


In [3]:
seq_len = 12        # Timesteps
n_seq = 325          # Features

hidden_dim = 24     # Hidden units for generator (GRU & LSTM).
                    # Also decides output_units for generator

gamma = 1           # Used for discriminator loss

noise_dim = 32      # Used by generator as a starter dimension
dim = 128           # UNUSED
batch_size = 128

learning_rate = 5e-4
beta_1 = 0          # UNUSED
beta_2 = 1          # UNUSED
data_dim = 28       # UNUSED

# batch_size, lr, beta_1, beta_2, noise_dim, data_dim, layers_dim
# gan_args = [batch_size, learning_rate, beta_1, beta_2, noise_dim, data_dim, dim]

# Create ModelParameters named tuple
gan_args = ModelParameters(batch_size=batch_size,
                           lr=learning_rate,
                           betas=(beta_1, beta_2),
                           noise_dim=noise_dim,
                           layers_dim=dim)

In [4]:
df = real_data_loading(data.values, seq_len=seq_len)

print(len(df), df[0].shape)

52100 (12, 325)


In [5]:
synth = TimeGAN(model_parameters=gan_args, hidden_dim=hidden_dim, seq_len=seq_len, n_seq=n_seq, gamma=gamma)
synth.train(df, train_steps=50)
# synth.save('synth_energy.pkl')

synth_data = synth.sample(8640)

Emddeding network training: 100%|██████████| 50/50 [19:15<00:00, 23.11s/it]
Supervised network training: 100%|██████████| 50/50 [19:14<00:00, 23.08s/it]
Joint networks training: 100%|██████████| 50/50 [59:13<00:00, 71.08s/it]
Synthetic data generation: 100%|██████████| 68/68 [00:28<00:00,  2.38it/s]


In [8]:
synth_data

array([[[0.7372075 , 0.6927607 , 0.6814533 , ..., 0.7067295 ,
         0.75227505, 0.7659936 ],
        [0.8179856 , 0.7697898 , 0.7454834 , ..., 0.77436167,
         0.8244731 , 0.85078084],
        [0.82796955, 0.7831218 , 0.7566668 , ..., 0.7797708 ,
         0.83045834, 0.86218625],
        ...,
        [0.8301102 , 0.78727585, 0.7611787 , ..., 0.779936  ,
         0.83075064, 0.8655002 ],
        [0.83011115, 0.78727835, 0.7611824 , ..., 0.7799358 ,
         0.8307501 , 0.8655027 ],
        [0.8301115 , 0.7872794 , 0.7611839 , ..., 0.77993566,
         0.83074987, 0.8655037 ]],

       [[0.7372072 , 0.6927604 , 0.68145305, ..., 0.7067292 ,
         0.7522747 , 0.76599324],
        [0.8179856 , 0.76978976, 0.74548334, ..., 0.7743616 ,
         0.824473  , 0.8507808 ],
        [0.82796955, 0.7831219 , 0.7566668 , ..., 0.7797708 ,
         0.83045834, 0.8621862 ],
        ...,
        [0.8301102 , 0.7872758 , 0.7611787 , ..., 0.7799359 ,
         0.83075064, 0.8655002 ],
        [0.8

In [9]:
df_synth = pd.DataFrame(synth_data[0])
df_synth


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,315,316,317,318,319,320,321,322,323,324
0,0.737207,0.692761,0.681453,0.715073,0.679841,0.719961,0.647282,0.703758,0.749886,0.679258,...,0.728758,0.745252,0.721861,0.740029,0.720651,0.617074,0.685363,0.706729,0.752275,0.765994
1,0.817986,0.76979,0.745483,0.791308,0.750065,0.796791,0.703188,0.780187,0.819161,0.752946,...,0.805314,0.82822,0.797972,0.820008,0.80254,0.658662,0.753751,0.774362,0.824473,0.850781
2,0.82797,0.783122,0.756667,0.799979,0.759564,0.803081,0.715143,0.789568,0.823517,0.761555,...,0.815793,0.837606,0.80929,0.828467,0.812194,0.663586,0.765525,0.779771,0.830458,0.862186
3,0.829543,0.786006,0.759579,0.801398,0.761368,0.803404,0.718502,0.791215,0.823354,0.762213,...,0.817543,0.838858,0.811714,0.829792,0.813657,0.66469,0.76831,0.779998,0.830831,0.864433
4,0.829926,0.786832,0.760572,0.801767,0.761936,0.803358,0.719642,0.791688,0.823166,0.762173,...,0.817949,0.839106,0.812444,0.830157,0.814028,0.665102,0.769166,0.779967,0.83081,0.865105
5,0.830045,0.787111,0.760944,0.801887,0.76215,0.803321,0.720065,0.791854,0.823084,0.762115,...,0.818068,0.83917,0.812701,0.830285,0.814153,0.665268,0.769464,0.779948,0.83078,0.865347
6,0.830086,0.787214,0.761089,0.801931,0.762234,0.803302,0.720228,0.791917,0.82305,0.762085,...,0.818108,0.83919,0.812798,0.830334,0.814201,0.665336,0.769575,0.77994,0.830763,0.865442
7,0.830102,0.787254,0.761146,0.801947,0.762268,0.803294,0.720292,0.791941,0.823037,0.762071,...,0.818123,0.839196,0.812836,0.830354,0.814219,0.665363,0.769618,0.779938,0.830755,0.865479
8,0.830108,0.78727,0.761169,0.801954,0.762282,0.803291,0.720318,0.791951,0.823032,0.762066,...,0.818129,0.839199,0.812851,0.830362,0.814227,0.665374,0.769635,0.779936,0.830752,0.865494
9,0.83011,0.787276,0.761179,0.801957,0.762287,0.80329,0.720329,0.791955,0.82303,0.762063,...,0.818131,0.8392,0.812857,0.830365,0.81423,0.665379,0.769642,0.779936,0.830751,0.8655


In [10]:
synth_data.shape

(8704, 12, 325)

In [11]:
synth.save('synth_energy.pkl')