In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install liac-arff

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from scipy.interpolate import CubicSpline
from scipy import signal
import json
import os
import sklearn
from arff import loads, dump
import shutil
import os.path
import re
import zipfile
from sklearn.preprocessing import RobustScaler,MinMaxScaler 
from sklearn.decomposition import PCA

In [None]:
def windowing(data,seq_len,stride):
    date_time = []
    for i in range(0, data.shape[0] - seq_len + 1, stride):
        t_slice=  data[i : i + seq_len]
        data_time.append(t_slice)
    return data_time

## Data Preprocessing

In [None]:
df = pd.read_csv("/kaggle/input/bridge2-10files/BBD_bridge_2.csv")
# df = pd.read_csv("/kaggle/input/bridge2-10files/VVB_bridge_2.csv")
df_x = df[['x']].plot(title='x axis')
df_y = df[['y']].plot(title='y axis')
data = df[['x','y']].values
min_max_scaler = MinMaxScaler()
norm_data = min_max_scaler.fit_transform(data)
x = norm_data[:,0]
y = norm_data[:,1]

In [10]:
# convert data to time series data 
seq_len = 500
stride = 76
x_time = windowing(x,seq_len,stride)
y_time = windowing(y,seq_len,stride)
x_time = np.array(x_time)
y_time = np.array(y_time)
x_time = np.expand_dims(x_time, axis=2)
y_time = np.expand_dims(y_time, axis=2)
xy_time = np.concatenate([x_time, y_time], axis=-1)
print(xy_time.shape)

(60, 500, 2)


## Train WGAN2

In [8]:
# Define path of WGAN1 generator 
gen_path = '/kaggle/input/generator0005/generator_0009.keras'
generator1 = keras.models.load_model(gen_path)

In [None]:
def make_discriminator2(D):
    input_layer = layers.Input(shape=(500,2,1))

    x = layers.Reshape((500,2))(input_layer)

    x = layers.Conv1D(D, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(0.2)(x)

    x = layers.Conv1D(D*2, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(0.2)(x)

    x = layers.Conv1D(D*4, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(0.2)(x)

    x = layers.Conv1D(D*8, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(0.2)(x)

    x = layers.Conv1D(D*16, 5, strides=2, padding='same')(x)
    x = layers.LeakyReLU(0.2)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(1)(x)

    discriminator2 = keras.models.Model(input_layer, x, name="discriminator2")
    return discriminator2
D = 64
discriminator2 = make_discriminator2(D)
discriminator2.summary()

In [None]:
def make_generator2(G):
    noise = layers.Input(shape=(500,2,1))

    x = layers.Reshape((500,2))(noise)

    x = layers.Conv1D(16 * G, 10, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv1D(8 * G, 10, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv1D(4 * G, 10, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv1D(2 * G, 10, strides=2, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv1D(1 , 10, strides=1, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.UpSampling1D(size=2)(x)
    x = layers.Conv1D(2 * G, 10, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.UpSampling1D(size=2)(x)
    x = layers.Conv1D(4 * G, 10, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.UpSampling1D(size=2)(x)
    x = layers.Conv1D(4 * G, 10, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.UpSampling1D(size=2)(x)
    x = layers.Conv1D(8 * G, 10, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv1D(2, 10, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("tanh")(x)

    x = layers.Cropping1D(6)(x)
    x = layers.Reshape((500,2,1))(x)

    generator2 = keras.models.Model(noise, x, name="generator2")
    return generator2
G = 32
generator2 = make_generator2(G)
generator2.summary()

In [12]:
class WGAN(keras.Model):
    def __init__(
        self,
        discriminator,
        generator,
        latent_dim,
        discriminator_extra_steps=5,
        gp_weight=10.0,
    ):
        super(WGAN, self).__init__()
        self.discriminator = discriminator
        self.generator = generator
        self.latent_dim = latent_dim
        self.d_steps = discriminator_extra_steps
        self.gp_weight = gp_weight

    def compile(self, d_optimizer, g_optimizer, d_loss_fn, g_loss_fn):
        super(WGAN, self).compile()
        self.d_optimizer = d_optimizer
        self.g_optimizer = g_optimizer
        self.d_loss_fn = d_loss_fn
        self.g_loss_fn = g_loss_fn

    def gradient_penalty(self, batch_size, real_images, fake_images):
        """ Calculates the gradient penalty.

        This loss is calculated on an interpolated image
        and added to the discriminator loss.
        """
        # Get the interpolated image

        alpha = tf.random.normal([batch_size, 1, 1, 1], 0.0, 1.0)
        diff = fake_images - real_images
        interpolated = real_images + alpha * diff

        with tf.GradientTape() as gp_tape:
            gp_tape.watch(interpolated)
            # 1. Get the discriminator output for this interpolated image.
            pred = self.discriminator(interpolated, training=True)

        # 2. Calculate the gradients w.r.t to this interpolated image.
        grads = gp_tape.gradient(pred, [interpolated])[0]
        # 3. Calculate the norm of the gradients.
        norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
        gp = tf.reduce_mean((norm - 1.0) ** 2)
        return gp

    def train_step(self, real_images):
        if isinstance(real_images, tuple):
            real_images = real_images[0]

        # Get the batch size
        batch_size = tf.shape(real_images)[0]

        # For each batch, we are going to perform the
        # following steps as laid out in the original paper:
        # 1. Train the generator and get the generator loss
        # 2. Train the discriminator and get the discriminator loss
        # 3. Calculate the gradient penalty
        # 4. Multiply this gradient penalty with a constant weight factor
        # 5. Add the gradient penalty to the discriminator loss
        # 6. Return the generator and discriminator losses as a loss dictionary

        # Train the discriminator first. The original paper recommends training
        # the discriminator for `x` more steps (typically 5) as compared to
        # one step of the generator. Here we will train it for 3 extra steps
        # as compared to 5 to reduce the training time.
        for i in range(self.d_steps):
            # Get the latent vector
            random_latent_vectors = generator1(tf.random.normal(
                shape=(batch_size, self.latent_dim)), training=False)
            with tf.GradientTape() as tape:
                # Generate fake images from the latent vector
                fake_images = self.generator(random_latent_vectors, training=True)
                # Get the logits for the fake images
                fake_logits = self.discriminator(fake_images, training=True)
                # Get the logits for the real images
                real_logits = self.discriminator(real_images, training=True)

                # Calculate the discriminator loss using the fake and real image logits
                d_cost = self.d_loss_fn(real_img=real_logits, fake_img=fake_logits)
                # Calculate the gradient penalty
                gp = self.gradient_penalty(batch_size, real_images, fake_images)
                # Add the gradient penalty to the original discriminator loss
                d_loss = d_cost + gp * self.gp_weight

            # Get the gradients w.r.t the discriminator loss
            d_gradient = tape.gradient(d_loss, self.discriminator.trainable_variables)
            # Update the weights of the discriminator using the discriminator optimizer
            self.d_optimizer.apply_gradients(
                zip(d_gradient, self.discriminator.trainable_variables)
            )

        # Train the generator
        # Get the latent vector
        random_latent_vectors = generator1(tf.random.normal(
                shape=(batch_size, self.latent_dim)), training=False)
        with tf.GradientTape() as tape:
            # Generate fake images using the generator
            generated_images = self.generator(random_latent_vectors, training=True)
            # Get the discriminator logits for fake images
            gen_img_logits = self.discriminator(generated_images, training=True)
            # Calculate the generator loss
            g_loss = self.g_loss_fn(gen_img_logits)

        # Get the gradients w.r.t the generator loss
        gen_gradient = tape.gradient(g_loss, self.generator.trainable_variables)
        # Update the weights of the generator using the generator optimizer
        self.g_optimizer.apply_gradients(
            zip(gen_gradient, self.generator.trainable_variables)
        )
        return {"d_loss": d_loss, "g_loss": g_loss}

In [13]:
class GANMonitor(keras.callbacks.Callback):
    def __init__(self, tries, num_img=6, latent_dim=noise_dim,plot_interval=500,plot_interval_img=10):
        self.num_img = num_img
        self.latent_dim = latent_dim
        self.plot_interval = plot_interval
        self.plot_interval_img = plot_interval_img
        self.d_losses = []
        self.g_losses = []
        self.tries = tries

    def on_epoch_end(self, epoch, logs=None):
        self.d_losses.append(logs["d_loss"])
        self.g_losses.append(logs["g_loss"])
        if epoch % self.plot_interval == 0:
            self.plot_losses(epoch)
            self.save_model(epoch)
            self.plot_synth(epoch)
        if epoch % self.plot_interval_img == 0:
            self.plot_synth_img(epoch)
            
            
    def plot_synth_img(self, epoch):
        random_latent_vectors = generator1(tf.random.normal(
        shape=(self.num_img, self.latent_dim)), training=False)
        generated_images = self.model.generator(random_latent_vectors)

        plt.figure()
        for i in range(self.num_img):
            img = generated_images[i].numpy()
            plt.plot(img[:,0,0])
#         plt.show()
#         plt.savefig('/kaggle/working/synth_img/synth_%d_%04d.png' % (self.tries, epoch))
        plt.close()
    def plot_synth(self, epoch):
        random_latent_vectors = generator1(tf.random.normal(
        shape=(self.num_img, self.latent_dim)), training=False)
        generated_images = self.model.generator(random_latent_vectors)

        plt.figure()
        for i in range(self.num_img):
            img = generated_images[i].numpy()
            plt.plot(img[:,0,0])
        plt.savefig('/kaggle/working/synth_img/synth_%d_%04d.png' % (self.tries, epoch))
        plt.close()

    def plot_losses(self, epoch):
        plt.figure(figsize=(10, 5))
        plt.plot(self.g_losses, label="Generator Loss")
        plt.plot(self.d_losses, label="Discriminator Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.title("GAN Losses")
        plt.legend()
        plt.savefig('/kaggle/working/loss_graph/time_%d_loss_%04d.png' % (self.tries, epoch))
        plt.close()
    def save_model(self, epoch):
        generator2.save('/kaggle/working/time_gen/gen_%d_time_%04d.keras' % (self.tries, epoch))
        discriminator2.save('/kaggle/working/time_disc/disc_%d_time_%04d.keras' % (self.tries, epoch))


In [None]:
os.mkdir('/kaggle/working/synth_img')
os.mkdir('/kaggle/working/time_gen')
os.mkdir('/kaggle/working/time_disc')
os.mkdir('/kaggle/working/loss_graph')

In [None]:
# define optimizers and loss function 
generator_optimizer = keras.optimizers.Adam(
    learning_rate=0.0002, beta_1=0.5, beta_2=0.9
)
discriminator_optimizer = keras.optimizers.Adam(
    learning_rate=0.0002, beta_1=0.5, beta_2=0.9
)

def discriminator_loss(real_img, fake_img):
    real_loss = tf.reduce_mean(real_img)
    fake_loss = tf.reduce_mean(fake_img)
    return fake_loss - real_loss

def generator_loss(fake_img):
    return -tf.reduce_mean(fake_img)
# The negative sign is used because, in optimization tasks, the goal is
# often to minimize a loss or cost function. By negating the score, the optimization
# process is turned into a maximization problem, which is a common approach when
# using minimization-based optimizers. This is known as minimizing the negative of
# the objective function.
## So the disc is trying to minimize fake_loss to distinguish it from the real samples
## and the generator is trying to maximize the fake loss to make fake data indistinguishable from real samples

# Set hyperparameter
epochs = 25000
tries = 9
batch_size = 10
noise_dim = 15
train_data = np.expand_dims(xy_time, axis=3)
# Instantiate the customer `GANMonitor` Keras callback.

## UNCOMMENT FOR COMPLETE CODE
cbk = GANMonitor(tries, num_img=5, latent_dim=noise_dim,plot_interval=1000,plot_interval_img=30)

# Instantiate the WGAN model.
wgan2 = WGAN(
    discriminator=discriminator2,
    generator=generator2,
    latent_dim=noise_dim,
    discriminator_extra_steps=5,
)

# Compile the WGAN model.
wgan2.compile(
    d_optimizer=discriminator_optimizer,
    g_optimizer=generator_optimizer,
    g_loss_fn=generator_loss,
    d_loss_fn=discriminator_loss,
)

# Start training the model.
history = wgan2.fit(train_data, batch_size=batch_size, epochs=epochs, callbacks=[cbk])

generator2.save('/kaggle/working/time_gen/gen_%04d.keras' % (tries))
discriminator2.save('/kaggle/working/time_disc/disc_%04d.keras' % (tries))

## Plot Synthetic Data

In [13]:
# Define path to WGAN1 and WGAN2 generator 
gen_path = '/kaggle/input/generator0005/generator_0007.keras'
generator1 = keras.models.load_model(gen_path)
gen_path2 = '/kaggle/input/generator0005/gen_7_time_18000.keras'
generator2 = keras.models.load_model(gen_path2)

In [None]:
synth_data_x = []
synth_data_y = []
n = 100
for i in range(n):
    x = generator2(generator1(tf.random.uniform(shape=(1,noise_dim), minval=-3, maxval=3), training=False),training=False)
    x = x.numpy()
    x = min_max_scaler.inverse_transform(x[0,:,:,0])
    x_hat = x[:,0]
    y_hat = x[:,1]
    synth_data_x.append(x_hat)
    synth_data_y.append(y_hat)

# Create a plot for synthetic X data with overlapping curves
plt.figure()  
for i in range(n):
    plt.plot(synth_data_x[i])
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Synthetic X Data with Overlapping Curves')
# plt.savefig('/kaggle/working/result/synth_x_5sample_BBD5.png')

# Create a plot for synthetic Y data with overlapping curves
plt.figure()  
for i in range(n):
    plt.plot(synth_data_y[i])
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Synthetic Y Data with Overlapping Curves')
# plt.savefig('/kaggle/working/result/synth_y_5sample_BBD5.png')

In [None]:
# create scatter plot
sample = generator2(generator1(tf.random.normal(shape=(n, 15)), training=False),training=False)
sample = sample.numpy()
synth_data = sample.reshape(-1, 2)
train_data = xy_time.reshape(-1, 2)
plt.scatter(synth_data[:,0], synth_data[:,1], alpha = 0.2, label = "Synthetic")
plt.scatter(train_data[:,0], train_data[:,1], alpha = 0.2, label = "Original")
plt.legend()
plt.show()

## MAKE ARFF FILES

In [21]:
os.mkdir('/kaggle/working/fid')

In [24]:
n = 0
n_files = 1
batch_num = 10
noise_dim = 15

In [25]:
for i in range(n_files):
    n = 502
    xy_hat = generator2(generator1(tf.random.normal(shape=(n, 15)), training=False),training=False)
    xy_hat = xy_hat.numpy()
    xy_hat[:,:,0,0] = (xy_hat[:,:,0,0]+1)*(max_x-min_x)/2 + min_x
    xy_hat[:,:,1,0] = (xy_hat[:,:,1,0]+1)*(max_y-min_y)/2 + min_y
    xy_hat = np.squeeze(xy_hat)
    xy_hat = np.concatenate(xy_hat, axis=0)
    x_hat = xy_hat[:, 0]
    y_hat = xy_hat[:, 1]
    x_hat = np.expand_dims(x_hat, axis=1)
    y_hat = np.expand_dims(y_hat, axis=1)
    time = np.array([range(0,x_hat.shape[0])])
    time = np.squeeze(time)
    time = np.expand_dims(time, axis=1)
    time = time.astype(int)
    confidence = np.ones(time.shape[0])
    confidence = np.expand_dims(confidence, axis=1)
    confidence = confidence.astype(int)
    synth_dataset = np.hstack((time, x_hat, y_hat, confidence))

    data = synth_dataset

    # Define attributes
    attributes = [
      ('time', 'INTEGER'),
      ('x', 'NUMERIC'),
      ('y', 'NUMERIC'),
      ('confidence', 'NUMERIC')
    ]

    # Create ARFF dataset
    dataset = {
      'data': data,
      'attributes': attributes,
      'relation': 'synth_gaze',
      'description': (
          ' Handlabelling data is arranged in 3 columns: handlabeller1 and handlabeller2 indicate the first two experts,'
          ' handlabeller_final column contains the final, tie-breaking labels.'
          '\n Labels in these columns are to be interpreted as follows:'
          '\n   - 0 is UNKNOWN'
          '\n   - 1 is FIX (fixation)'
          '\n   - 2 is SACCADE'
          '\n   - 3 is SP (smooth pursuit)'
          '\n   - 4 is NOISE'
          '\n'
          '\n@METADATA width_px 1280.0'
          '\n@METADATA height_px 720.0'
          '\n@METADATA width_mm 400.0'
          '\n@METADATA height_mm 225.0'
          '\n@METADATA distance_mm 450.0'
      )
    }

    # Save ARFF file
#     arff_file_path = '/kaggle/working/7/%d/VVB_uniform_%d.arff' %(batch_num,batch_num)
    arff_file_path = '/kaggle/working/fid/VVB_uniform_fid.arff' 
    with open(arff_file_path, 'w') as f:
        dump(dataset, f)
    n = n + 1