In [None]:
import os
import time
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler


import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
tf.__version__

'2.3.0'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/Tesis/CreacionDataGAN

/content/drive/MyDrive/Tesis/CreacionDataGAN


In [None]:
path = "/content/drive/MyDrive/Tesis/CreacionDataGAN"

In [None]:
df = pd.read_csv("selected_users_data.csv")
df["time-f"] = pd.to_datetime(df['time']).astype(int)/10**9
df = df[['time-f','lat','lon','user']]
df.head()

Unnamed: 0,time-f,lat,lon,user
0,1266231000.0,30.174892,121.238187,132
1,1266231000.0,30.174869,121.238065,132
2,1266231000.0,30.175194,121.237918,132
3,1266231000.0,30.175112,121.237957,132
4,1266231000.0,30.175115,121.237967,132


In [None]:
class DPGAN:

    def __init__(
        self,
        input_dim,
        random_dim,
        discriminatorDims,
        generatorDims,
        g_optimizer,
        d_optimizer,
        noise_std,
        d_iter,
        epsilon
    ):

        self.noise_std = noise_std
        self.d_iter = d_iter
        self.epsilon = epsilon

        self.input_dim = input_dim
        self.random_dim = random_dim
        self.discriminatorDims = discriminatorDims
        self.generatorDims = generatorDims

        self.g_net = self.create_generator()
        self.d_net = self.create_discriminator()

        self.g_optimizer = g_optimizer
        self.d_optimizer = d_optimizer

        self.g_loss_store = []
        self.d_loss_store = []
        self.wdis_store   = []

    def create_generator(self):
        G = tf.keras.Sequential()
        G.add(tf.keras.Input(shape=(self.random_dim,)))
        for u in self.generatorDims[:-1]:
            G.add(tf.keras.layers.Dense(units=u))
            G.add(tf.keras.layers.BatchNormalization(epsilon=self.epsilon))
            G.add(tf.keras.layers.LeakyReLU(alpha=0.2))
        G.add(tf.keras.layers.Dense(self.generatorDims[-1], activation="relu"))

        return G

    def create_discriminator(self):
        D = tf.keras.Sequential()
        D.add(tf.keras.Input(shape=(self.input_dim,)))
        for u in self.discriminatorDims[:-1]:
            D.add(tf.keras.layers.Dense(units=u))
            D.add(tf.keras.layers.LeakyReLU(alpha=0.2))
        D.add(tf.keras.layers.Dense(self.discriminatorDims[-1], activation="relu"))

        return D

    def random_noise(self, nrows=None):
        return tf.random.uniform(
            (self.batch_size if not nrows else nrows, self.random_dim),
            minval=-1, maxval=1
        )

    def dpnoise(self, tensor):
        '''add noise to tensor'''
        s = tensor.get_shape().as_list()  # get shape of the tensor

        rt = tf.random.normal(s, mean=0.0, stddev=self.noise_std)
        t = tf.add(tensor, tf.scalar_mul((1.0 / self.batch_size), rt))
        return t

    @tf.function
    def g_train_step(self, x_real):

        with tf.GradientTape() as gen_tape:
            z = self.random_noise()

            x_fake = self.g_net(z, training=True)

            y_hat_fake = self.d_net(x_fake, training=False)

            g_loss = tf.reduce_mean(y_hat_fake)

        g_grads = gen_tape.gradient(g_loss, self.g_net.trainable_variables)

        self.g_optimizer.apply_gradients(zip(g_grads, self.g_net.trainable_variables))

        return g_loss

    @tf.function
    def d_train_step(self, x_real):

        with tf.GradientTape() as disc_tape:
            z = self.random_noise()

            x_fake = self.g_net(z, training=False)

            y_hat_real = self.d_net(x_real, training=True)
            y_hat_fake = self.d_net(x_fake, training=True)

            d_loss = tf.reduce_mean(y_hat_fake) - tf.reduce_mean(y_hat_real) 

        d_grads = disc_tape.gradient(d_loss, self.d_net.trainable_variables)

        d_grads_new = list(map(self.dpnoise, d_grads))

        self.d_optimizer.apply_gradients(zip(d_grads_new, self.d_net.trainable_variables))

        self.d_clip = [v.assign(tf.clip_by_value(v, -0.01, 0.01)) for v in self.d_net.trainable_variables]

        return d_loss

    def train(self, dataset, nepochs, batch_size, output_examples):
        self.batch_size = batch_size
        self.output_examples = output_examples
        with tf.device("gpu:0"):
            for epoch in range(nepochs):
                start_time = time.time()
                pbar = tqdm(dataset, disable=True)
                pbar.set_description("Epoch {}/{}".format(epoch+1, nepochs))
                d_iter_ = 0
                for batch in pbar:
                    batch = tf.cast(batch, tf.float32)
                    if d_iter_ < self.d_iter:
                        rd_loss = self.d_train_step(batch)
                        d_iter_ += 1
                        continue
                    rg_loss = self.g_train_step(batch)
                    d_iter_ = 0

                    self.g_loss_store.append(rg_loss.numpy())
                    self.d_loss_store.append(rd_loss.numpy())

        z_sample = self.random_noise(self.output_examples)
        x_gene = self.g_net.predict(z_sample)
        return x_gene

In [None]:
input_dim = 3
random_dim = 100

noise_std = [1.5]
d_iter = [4]
batch_size = [64]
optimizer = ["Adam", "RMSProp"]
discriminatorDims = [
    [512, 256, 128, 64, 32, 16, 1]
]
generatorDims = [
    [512, 256, 128, 64, input_dim]
]
users = [1,2,3,5,7,8,9,10,13,14,15,17,21,22,28,32,33,44,49,50,56,61,64,65,66,68,70,77,79,80,90,91,92,101,105,107,115,119,123,127,131,136,137,138,150,152,160,165,169,172]
epsilon = [0.001, 0.005, 0.01, 0.1, 1]
hyper_pars = []
for user in users:
  for opt in optimizer:
      for d_iter_ in d_iter:
          for batch_s in batch_size:
              for d_dims in discriminatorDims:
                  for g_dims in generatorDims:
                      for noise in noise_std:
                          for eps in epsilon:
                            hyper_pars.append({
                              "user": user,
                              "input_dim": input_dim,
                              "random_dim": random_dim,
                              "optimizer": opt,
                              "d_iter": d_iter_,
                              "noise_std": noise,
                              "batch_size": batch_s,
                              "discriminatorDims": d_dims,
                              "generatorDims": g_dims,
                              "epsilon": eps
                          })

In [None]:
nepochs = 1000
for param in hyper_pars:
    
    dir_name = os.path.join(
        path, "outputs_dpgan", "user_{}".format(param["user"])
    )
    filename = "data_{}_batchSize_{}_noise_std_{}_dIter_{}_dDims_{}_gDims_{}_eps_{}.csv".format(
        param["optimizer"],
        param["batch_size"],
        param["noise_std"],
        param["d_iter"],
        param["epsilon"]
        "_".join(map(str, param["discriminatorDims"])),
        "_".join(map(str, param["generatorDims"]))
    )
    
    if not os.path.isfile(os.path.join(dir_name, filename)):
      if param["optimizer"] == "Adam":
          g_optimizer = tf.keras.optimizers.Adam(
              learning_rate=1e-4, 
              beta_1=0.5, 
              beta_2=0.9
          )
          d_optimizer = tf.keras.optimizers.Adam(
              learning_rate=1e-4, 
              beta_1=0.5, 
              beta_2=0.9
          )
      else:
          g_optimizer = tf.keras.optimizers.RMSprop(learning_rate=2.5e-5)
          d_optimizer = tf.keras.optimizers.RMSprop(learning_rate=2.5e-5)
          
      dp = DPGAN(
          param["input_dim"],
          param["random_dim"],
          param["discriminatorDims"],
          param["generatorDims"],
          g_optimizer,
          d_optimizer,
          param["noise_std"],
          param["d_iter"],
          param["epsilon"]
      )
      
      data = df[df.user == param["user"]][['time-f','lat','lon']]

      scaler = StandardScaler()
      scaler.fit(data)
      data = scaler.transform(data)

      dataset = tf.data.Dataset.from_tensor_slices(data).shuffle(50000).batch(param["batch_size"], drop_remainder=True)
      
      results = dp.train(dataset, nepochs, param["batch_size"], data.shape[0])

      if not os.path.isdir(dir_name):
          os.makedirs(dir_name)

      temp = scaler.inverse_transform(results)
      temp = pd.DataFrame(temp)
      temp.rename(columns={1:'lat',
                            2:'lon',
                            0:'time'}, inplace=True)

      temp['time'] = pd.to_datetime(temp['time'], unit='s')
      temp['user'] = param["user"]
      temp = temp[['user','time','lat','lon']]
      temp.to_csv(os.path.join(dir_name, filename), index=False, header=False)
      del temp, results
    else:
      print("skipped %s" % filename)