In [1]:
import argparse
import logging
import os
import pickle
import random
import shutil
import time

# 3rd-Party Modules
import numpy as np
import torch
import joblib
from sklearn.model_selection import train_test_split

# Self-Written Modules
from data.data_preprocess import data_preprocess
from metrics.metric_utils import (
    feature_prediction, one_step_ahead_prediction, reidentify_score
)

from models.timegan import TimeGAN
from models.utils import timegan_trainer, timegan_generator

2025-01-04 10:42:39.956954: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-04 10:42:40.244791: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-04 10:42:40.251478: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.3/lib64:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
2025-01-04

In [6]:
class Config:
    def __init__(self):
        self.device = 'cuda'
        self.exp = 'test'
        self.is_train = True
        self.seed = 0
        self.feat_pred_no = 2
        self.max_seq_len = 100
        self.train_rate = 0.5
        self.emb_epochs = 600
        self.sup_epochs = 600
        self.gan_epochs = 600
        self.batch_size = 128
        self.hidden_dim = 20
        self.num_layers = 3
        self.dis_thresh = 0.15
        self.optimizer = 'adam'
        self.learning_rate = 1e-3

args = Config()

def str2bool(v):
    if isinstance(v, bool):
       return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')


In [None]:
code_dir = os.path.abspath(".")
if not os.path.exists(code_dir):
    raise ValueError(f"Code directory not found at {code_dir}.")

## Data directory
data_path = os.path.abspath("./data")
if not os.path.exists(data_path):
    raise ValueError(f"Data file not found at {data_path}.")
data_dir = os.path.dirname(data_path)
data_file_name = os.path.basename(data_path)

## Output directories
args.model_path = os.path.abspath(f"./output/{args.exp}/")
out_dir = os.path.abspath(args.model_path)
if not os.path.exists(out_dir):
    os.makedirs(out_dir, exist_ok=True)

# TensorBoard directory
tensorboard_path = os.path.abspath("./tensorboard")
if not os.path.exists(tensorboard_path):
    os.makedirs(tensorboard_path, exist_ok=True)

print(f"\nCode directory:\t\t\t{code_dir}")
print(f"Data directory:\t\t\t{data_path}")
print(f"Output directory:\t\t{out_dir}")
print(f"TensorBoard directory:\t\t{tensorboard_path}\n")


os.environ['PYTHONHASHSEED'] = str(args.seed)
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

if args.device == "cuda" and torch.cuda.is_available():
    print("Using CUDA\n")
    args.device = torch.device("cuda:0")
    # torch.cuda.manual_seed_all(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    print("Using CPU\n")
    args.device = torch.device("cpu")


data_path = "data/stock.csv"
X, T, _, args.max_seq_len, args.padding_value = data_preprocess(
    data_path, args.max_seq_len
)

print(f"Processed data: {X.shape} (Idx x MaxSeqLen x Features)\n")
print(f"Original data preview:\n{X[:2, :10, :2]}\n")

args.feature_dim = X.shape[-1]
args.Z_dim = X.shape[-1]


train_data, test_data, train_time, test_time = train_test_split(
    X, T, test_size=args.train_rate, random_state=args.seed
)


Code directory:			/h3cstore_ns/jcxie/zsl/timegan-pytorch-main
Data directory:			/h3cstore_ns/jcxie/zsl/timegan-pytorch-main/data
Output directory:		/h3cstore_ns/jcxie/zsl/timegan-pytorch-main/output/test
TensorBoard directory:		/h3cstore_ns/jcxie/zsl/timegan-pytorch-main/tensorboard

Using CUDA

Loading data...

Dropped 504 rows (outliers)



100%|██████████| 3676/3676 [00:06<00:00, 593.70it/s]

Processed data: (3676, 100, 6) (Idx x MaxSeqLen x Features)

Original data preview:
[[[ 0.19376718  0.19446839]
  [ 0.19232369  0.19224311]
  [ 0.19594256  0.19481357]
  [ 0.20078938  0.20019403]
  [ 0.19906535  0.20037676]
  [ 0.19672326  0.19752207]
  [ 0.19728439  0.19644191]
  [-1.         -1.        ]
  [-1.         -1.        ]
  [-1.         -1.        ]]

 [[ 0.4860957   0.49640034]
  [ 0.48522808  0.48878844]
  [ 0.48351736  0.48673669]
  [ 0.48463053  0.48547787]
  [ 0.49108043  0.4905124 ]
  [ 0.48256791  0.48940151]
  [ 0.47696925  0.48430077]
  [-1.         -1.        ]
  [-1.         -1.        ]
  [-1.         -1.        ]]]






In [8]:

start = time.time()

model = TimeGAN(args)
if args.is_train == True:
    timegan_trainer(model, train_data, train_time, args)
generated_data = timegan_generator(model, train_time, args)
generated_time = train_time


end = time.time()

print(f"Generated data preview:\n{generated_data[:2, -10:, :2]}\n")
print(f"Model Runtime: {(end - start)/60} mins\n")


Start Embedding Network Training


Epoch: 599, Loss: 0.0011: 100%|██████████| 600/600 [02:15<00:00,  4.43it/s]



Start Training with Supervised Loss Only


Epoch: 599, Loss: 0.0080: 100%|██████████| 600/600 [01:49<00:00,  5.50it/s]



Start Joint Training


Epoch: 599, E: 0.0971, G: 1.8348, D: 1.8551: 100%|██████████| 600/600 [18:44<00:00,  1.87s/it]


Saved at path: /h3cstore_ns/jcxie/zsl/timegan-pytorch-main/output/test

Generating Data...
Generated data preview:
[[[-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]]

 [[-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]
  [-1.0003532 -1.0001502]]]

Model Runtime: 23.00885624885559 mins






In [9]:
with open(f"{args.model_path}/train_data.pickle", "wb") as fb:
    pickle.dump(train_data, fb)
with open(f"{args.model_path}/train_time.pickle", "wb") as fb:
    pickle.dump(train_time, fb)
with open(f"{args.model_path}/test_data.pickle", "wb") as fb:
    pickle.dump(test_data, fb)
with open(f"{args.model_path}/test_time.pickle", "wb") as fb:
    pickle.dump(test_time, fb)
with open(f"{args.model_path}/fake_data.pickle", "wb") as fb:
    pickle.dump(generated_data, fb)
with open(f"{args.model_path}/fake_time.pickle", "wb") as fb:
    pickle.dump(generated_time, fb)



# Define enlarge data and its labels
enlarge_data = np.concatenate((train_data, test_data), axis=0)
enlarge_time = np.concatenate((train_time, test_time), axis=0)
enlarge_data_label = np.concatenate((np.ones([train_data.shape[0], 1]), np.zeros([test_data.shape[0], 1])), axis=0)

# Mix the order
idx = np.random.permutation(enlarge_data.shape[0])
enlarge_data = enlarge_data[idx]
enlarge_data_label = enlarge_data_label[idx]



# 1. Feature prediction
feat_idx = np.random.permutation(train_data.shape[2])[:args.feat_pred_no]
print("Running feature prediction using original data...")
ori_feat_pred_perf = feature_prediction(
    (train_data, train_time), 
    (test_data, test_time),
    feat_idx
)
print("Running feature prediction using generated data...")
new_feat_pred_perf = feature_prediction(
    (generated_data, generated_time),
    (test_data, test_time),
    feat_idx
)

feat_pred = [ori_feat_pred_perf, new_feat_pred_perf]

print('Feature prediction results:\n' +
        f'(1) Ori: {str(np.round(ori_feat_pred_perf, 4))}\n' +
        f'(2) New: {str(np.round(new_feat_pred_perf, 4))}\n')

# 2. One step ahead prediction
print("Running one step ahead prediction using original data...")
ori_step_ahead_pred_perf = one_step_ahead_prediction(
    (train_data, train_time), 
    (test_data, test_time)
)
print("Running one step ahead prediction using generated data...")
new_step_ahead_pred_perf = one_step_ahead_prediction(
    (generated_data, generated_time),
    (test_data, test_time)
)

step_ahead_pred = [ori_step_ahead_pred_perf, new_step_ahead_pred_perf]

print('One step ahead prediction results:\n' +
        f'(1) Ori: {str(np.round(ori_step_ahead_pred_perf, 4))}\n' +
        f'(2) New: {str(np.round(new_step_ahead_pred_perf, 4))}\n')

print(f"Total Runtime: {(time.time() - start)/60} mins\n")


Running feature prediction using original data...


Epoch: 19, Loss: 0.2464: 100%|██████████| 20/20 [00:03<00:00,  6.28it/s]
Epoch: 19, Loss: 2.6676: 100%|██████████| 20/20 [00:03<00:00,  6.21it/s] 


Running feature prediction using generated data...


Epoch: 19, Loss: 0.0241: 100%|██████████| 20/20 [00:03<00:00,  6.28it/s]
Epoch: 19, Loss: 0.0206: 100%|██████████| 20/20 [00:03<00:00,  6.23it/s]


Feature prediction results:
(1) Ori: [0.2955 0.239 ]
(2) New: [0.1357 0.1448]

Running one step ahead prediction using original data...


Epoch: 19, Loss: 0.4252: 100%|██████████| 20/20 [00:03<00:00,  6.05it/s]


Running one step ahead prediction using generated data...


Epoch: 19, Loss: 0.3763: 100%|██████████| 20/20 [00:03<00:00,  6.13it/s]


One step ahead prediction results:
(1) Ori: 0.3261
(2) New: 0.3078

Total Runtime: 23.721916536490124 mins

