# MTS Generation with COSCI-GAN.


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import time
from configs.Metric import Metric
from configs.SimulatedData import Proposed
from utils.metric import signature_on_batch, signature_metric
import mlflow
from sklearn.decomposition import PCA
from datetime import datetime
import io
from tensorflow.keras import layers

gpus = tf.config.list_physical_devices('GPU')

if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs.
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized.
    print(e)

# Load the Data.

In [None]:
config= Proposed()

SEQUENCE_LENGTH = config.sequence_lenght_in_sample
GRANUARITY = config.granularity
OVERLAP = config.overlap
BS = config.batch_size
EPOCHS = config.epochs
NUM_SEQUENCE_TO_GENERATE = config.met_params.sequence_to_generate
STEP_LIMITATION = 1000
UPDATE_DISCRIMINATOR = 20

SIMULATED_DATA_PATH = "data/simulated_dataset/01 - Source Domain.h5"
N_SAMPLE_WIENER = SEQUENCE_LENGTH//4
FEAT_WIENER = 2
NOISE_DIM= (N_SAMPLE_WIENER, FEAT_WIENER)

LAMBDA_GLOBAL = 0.001
LAMBDA_LOCAL = 0.001


In [None]:
def convert_dataframe_to_tensorflow_sequences(df:pd.DataFrame, sequence_lenght_in_sample, granularity, shift_between_sequences, batch_size, shuffle=True):
    sequence_lenght = int(sequence_lenght_in_sample*granularity)

    dset = tf.data.Dataset.from_tensor_slices(df.values)
    dset = dset.window(sequence_lenght , shift=shift_between_sequences, stride=granularity).flat_map(lambda x: x.batch(sequence_lenght_in_sample, drop_remainder=True))

    if shuffle:
        dset= dset.shuffle(256)

    dset = dset.batch(batch_size, drop_remainder=True)

    dset = dset.cache().prefetch(10)

    return dset

In [None]:
df_simulated = pd.read_hdf(SIMULATED_DATA_PATH)
df_simulated = df_simulated.drop(columns='labels')

dset_simulated = convert_dataframe_to_tensorflow_sequences(
    df_simulated, 
    SEQUENCE_LENGTH, 
    GRANUARITY, 
    int(OVERLAP* SEQUENCE_LENGTH),
    BS
)

dset_simulated = dset_simulated.take(STEP_LIMITATION)

## Verify if it is correct

In [None]:
sequence = next(iter(dset_simulated))[0]
print(sequence.shape)

plt.figure(figsize=(18, 5))
plt.title("Simulated Sequence.")
for i in range(sequence.shape[1]):
    plt.plot(sequence[:, i], label=f'feat {i+1}')
plt.grid()
plt.legend()
plt.show()

## Make some Wiener Noise.

In [None]:
def wiener_process(batch:int, n_sample_wiener:int, n_feat_wiener:int):
    d_noise = tf.random.normal([batch, n_sample_wiener, n_feat_wiener])
    wiener_noise = tf.math.cumsum(d_noise, axis=1)
    return wiener_noise


seed = wiener_process(NUM_SEQUENCE_TO_GENERATE, N_SAMPLE_WIENER, FEAT_WIENER)


In [None]:
def draw_arrow(A, B, color="b"):
    plt.arrow(A[0], A[1], B[0] - A[0], B[1] - A[1],
              length_includes_head=True, color=color)
    
def draw_arrows(xs, ys, color="b"):
    for i in range(xs.shape[0]-1):
        point0 = [xs[i], ys[i]]
        point1 = [xs[i+1], ys[i+1]]
        draw_arrow(point0, point1, color=color)

plt.figure(figsize=(18, 5))
plt.title("Example of the wiener process.")

draw_arrows(seed[0,:,0], seed[0,:,1], color="tab:blue")
plt.scatter(seed[0,:,0], seed[0,:,1], label='Wiener Process.', color='tab:blue')

plt.grid()
plt.legend()

## Make Model Architectures.

In [None]:


def make_generator(n_sample_wiener:int, feat_wiener:int):
    
    init = tf.keras.initializers.RandomNormal()

    _content_input = tf.keras.Input((n_sample_wiener, feat_wiener))

    # Make a small projection...

    x = tf.keras.layers.Flatten()(_content_input)
    x = tf.keras.layers.Dense(n_sample_wiener* feat_wiener, name='1', kernel_initializer=init)(x)
    x = tf.keras.layers.Reshape((n_sample_wiener, feat_wiener))(x)

    x = tf.keras.layers.Conv1DTranspose(256, 5, 2, padding='same', kernel_initializer=init)(x)
    x = tf.keras.layers.LeakyReLU()(x)

    x = tf.keras.layers.Conv1DTranspose(256, 5, 1, padding='same', kernel_initializer=init)(x)
    x = tf.keras.layers.LeakyReLU()(x)

    x = tf.keras.layers.Conv1DTranspose(1, 5, 2, padding='same', kernel_initializer=init)(x)
    x = tf.keras.layers.LeakyReLU()(x)

    model = tf.keras.Model(_content_input, x)
    return model


generator = make_generator(16, 2)
generator.summary()
NOISE_DIM

In [None]:
def make_global_discriminator(seq_length:int, n_feat:int):
    _input = tf.keras.Input((seq_length, n_feat))
    x = tf.keras.layers.Conv1D(8, 5, 2, padding='same')(_input)
    x = layers.LeakyReLU()(x)

    x = layers.Flatten()(x)
    x = layers.Dropout(0.25)(x)
    _output = layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(_input, _output)
    early_predictor = tf.keras.Model(_input, x, name="Local Discriminator")

    return model, early_predictor


def local_discriminator(seq_length:int):
    _input = tf.keras.Input((seq_length, 1))

    x = tf.keras.layers.Conv1D(32, 5, 2, padding='same')(_input)
    x = layers.LeakyReLU()(x)

    x = tf.keras.layers.Conv1D(32, 5, 2, padding='same')(x)
    x = layers.LeakyReLU()(x)

    x = layers.Flatten()(x)
    _output = layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(_input, _output, name="local discriminator")

    return model

In [None]:
generators = [make_generator(16, 2) for _ in range(df_simulated.shape[1])]
local_discriminators = [local_discriminator(SEQUENCE_LENGTH) for _ in range(df_simulated.shape[1])]

global_discriminator, early_predictor = make_global_discriminator(SEQUENCE_LENGTH, df_simulated.shape[1])

global_discriminator.summary()

### Plot a Sequence.

In [None]:
def generate(content_wp, training=True):
    signals = np.array([g(content_wp, training=training) for g in generators])
    signals = tf.transpose(signals, (1, 2, 0, 3))
    signals = tf.reshape(signals, signals.shape[:-1])
    return signals

def local_discrimination(sequences, training=True):
    crit = np.array([d(sequences[:, :, i], training=training) for i, d in enumerate(local_discriminators)])
    # crit = tf.transpose(crit, (1, 0, 2))
    return crit

In [None]:
generated = generate(seed)

# after_training_generations
def plot_several_generations(generations:np.ndarray, nvertical:int=3, nhoriz:int=3):

    legend = [f"feat {j}" for j in range(generations.shape[-1])]

    plt.figure(figsize=(18, 10))
    plt.suptitle("Several Generations")

    for i in range(nvertical* nhoriz):
        ax = plt.subplot(nvertical, nhoriz, i+ 1)
        ax.set_title(f"sequence {i+1}")

        plt.plot(generations[i])
        ax.grid(True)
        plt.legend(legend)

    plt.tight_layout()
    plt.show()

plot_several_generations(generated) 

## Define losses

In [None]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

def similarity_loss(extracted_features:np.ndarray):
    anchor = extracted_features[0]
    return tf.exp(-(tf.norm(extracted_features[1:]- anchor)))

In [None]:
## Generate the reference signature.

real_sequence_batch = next(iter(dset_simulated))

real_batch_signature= signature_on_batch(real_sequence_batch, config.met_params.ins, config.met_params.outs, config.met_params.signature_length)
generated_batch_signature= signature_on_batch(generated, config.met_params.ins, config.met_params.outs, config.met_params.signature_length)

signature_metric(real_batch_signature, generated_batch_signature)

In [None]:
generator_opt = [tf.keras.optimizers.RMSprop(1e-2) for _ in generators]
discriminator_opt = [tf.keras.optimizers.RMSprop(2e-3) for _ in local_discriminators]
global_discriminator_opt = tf.keras.optimizers.RMSprop(2e-3)

# generator_optimizer = tf.keras.optimizers.Adam(1e-4)
# discriminator_optimizer = tf.keras.optimizers.Adam(1e-6)    

## Define metrics 

In [None]:
generator_metric = tf.keras.metrics.Mean()
local_discriminator_metric = tf.keras.metrics.Mean()
global_discriminator_metric = tf.keras.metrics.Mean()
similarity_metric = tf.keras.metrics.Mean()
correlation_metric = tf.keras.metrics.Mean()

## Tensorboard logs.

In [None]:
date_str = datetime.now().strftime('%Y-%m-%d_%H_%M_%S')

BASE_DIR = f"log - COSCI-GAN/{date_str} - COSCI-GAN"
TRAIN_LOGS_DIR_PATH = f"{BASE_DIR}/fit"
GENERATION_LOG = f"{BASE_DIR}/Generations"

TRAIN_SUMMARY_WRITER = tf.summary.create_file_writer(TRAIN_LOGS_DIR_PATH)


def plot_to_buff(generations:np.ndarray, nvertical:int=3, nhoriz:int=3):
    legend = [f"feat {j}" for j in range(generations.shape[-1])]

    fig = plt.figure(figsize=(18, 10))
    plt.suptitle("Generations After GAN Training.")

    for i in range(nvertical* nhoriz):
        ax = plt.subplot(nvertical, nhoriz, i+ 1)
        ax.set_title(f"sequence {i+1}")

        plt.plot(generations[i])
        ax.grid(True)
        plt.legend(legend)

    plt.tight_layout()

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close(fig)
    return buf


def log_losses(epoch, plot_buf):
    image = tf.image.decode_png(plot_buf.getvalue(), channels=4)
    image = tf.expand_dims(image, 0)

    with TRAIN_SUMMARY_WRITER.as_default():
        tf.summary.scalar("Generator Loss", generator_metric.result(), step=epoch)

        tf.summary.scalar("Local D loss", local_discriminator_metric.result(), step=epoch)
        tf.summary.scalar("Gobal D loss", global_discriminator_metric.result(), step=epoch)
        
        tf.summary.scalar("Mode Colapsing ?", similarity_metric.result(), step=epoch)
        tf.summary.scalar("Correlation Metric", correlation_metric.result(), step=epoch)

        tf.summary.image("Training Generations", image, step=epoch)


# Training Functions

In [None]:
def generate_plots(noise, save_to):
    generated = generate(seed)

    fig =plt.figure(figsize=(18, 5))
    plt.title("Generation of the GAN during Training.")
    for i in range(generated.shape[-1]):
        plt.plot(generated[0, :, i], label=f'feat {i+1}')
    plt.grid()
    plt.legend()

    plt.savefig(save_to)
    plt.close(fig)

In [None]:
bc = tf.keras.losses.BinaryCrossentropy()

def local_discriminator_loss(crits_on_real, crits_on_fake):
    individual_losses = []
    preds_shape = crits_on_real.shape[1:]

    for i in range(crits_on_real.shape[0]):
        l1 = bc(tf.zeros(preds_shape), crits_on_fake[i])
        l2 = bc(tf.ones(preds_shape), crits_on_real[i])
        loss = (l1+ l2)/2
        individual_losses.append(loss)
    return individual_losses


def local_generator_loss(crit_on_fake):
    individual_losses = []
    preds_shape = crit_on_fake.shape[1:]

    for i in range(crit_on_fake.shape[0]):
        individual_losses.append(bc(tf.ones(preds_shape), crit_on_fake[i]))
        
    return individual_losses

def global_discriminator_loss(crit_on_real, crit_on_fake):
    l1 = bc(tf.zeros_like(crit_on_fake), crit_on_fake)
    l2 = bc(tf.ones_like(crit_on_real), crit_on_real)

    loss = (l1+ l2)/2

    return loss


def global_generator_loss(crit_on_fake):
    loss = bc(tf.ones_like(crit_on_fake), crit_on_fake)
    return loss

In [None]:
def train_step(real_ts, update_discr:bool=False):
    alpha=1
    noise= wiener_process(BS, N_SAMPLE_WIENER, FEAT_WIENER)

    # print(real_ts.shape, tf.reshape(real_ts[:, :, 0], (BS, SEQUENCE_LENGTH, 1)))
    if update_discr == True:
        with tf.GradientTape(persistent=True) as d_tape:
            generated_ts = [g(noise, training=False) for g in generators]

            local_crit_gen = tf.convert_to_tensor([local_discriminators[i](generated_ts[i], training=True) for i in range(len(local_discriminators))])
            local_crit_real = tf.convert_to_tensor([local_discriminators[i](real_ts[:, :, i], training=True) for i in range(len(local_discriminators))])

            local_d_losses = local_discriminator_loss(local_crit_real, local_crit_gen)

            # Reshape the generated sequences for the global dicriminator.
            generated_reshaped = tf.stack(generated_ts, -1)
            generated_reshaped = tf.reshape(generated_ts, (BS, SEQUENCE_LENGTH, generated_reshaped.shape[-1]))

            g_crit_gen = global_discriminator(generated_reshaped, training=True)
            g_crit_real= global_discriminator(real_ts, training=True)

            global_d_loss = global_discriminator_loss(g_crit_real, g_crit_gen)

    with tf.GradientTape(persistent=True) as g_tape:
        generated_ts = [g(noise, training=True) for g in generators]

        local_crit_gen = tf.convert_to_tensor([local_discriminators[i](generated_ts[i], training=False) for i in range(len(local_discriminators))])

        # Reshape the generated sequences for the global dicriminator.
        generated_reshaped = tf.stack(generated_ts, -1)
        generated_reshaped = tf.reshape(generated_ts, (BS, SEQUENCE_LENGTH, generated_reshaped.shape[-1]))

        g_crit_gen = global_discriminator(generated_reshaped, training=False)

        extracted_features = early_predictor(generated_reshaped, training=False)

        local_g_loss = local_generator_loss(local_crit_gen)
        global_g_loss= global_generator_loss(g_crit_gen)
        s_loss = similarity_loss(extracted_features)

        g_loss = [LAMBDA_LOCAL* l_loss + LAMBDA_GLOBAL* global_g_loss+ alpha* s_loss for l_loss in local_g_loss]

    if update_discr == True:
        # Compute the gradients and update the weight for ...
        # Global Discriminator...
        global_discr_gradient = d_tape.gradient(global_d_loss, global_discriminator.trainable_variables)
        global_discriminator_opt.apply_gradients(zip(global_discr_gradient, global_discriminator.trainable_variables)) 

        #Local Discriminator...
        for i in range(len(local_discriminators)):   
            grads = d_tape.gradient(local_d_losses[i], local_discriminators[i].trainable_variables)
            discriminator_opt[i].apply_gradients(zip(grads, local_discriminators[i].trainable_variables))

    # And Local Generators !
    for i in range(len(generators)):   
        # print(i)
        grads = g_tape.gradient(g_loss[i], generators[i].trainable_variables)
        generator_opt[i].apply_gradients(zip(grads, generators[i].trainable_variables))

    # Save metric for display
    if update_discr == True:
        local_discriminator_metric(tf.reduce_mean(local_d_losses))
        global_discriminator_metric(global_d_loss)
        
    generator_metric(tf.reduce_mean(local_g_loss))
    similarity_metric(s_loss)

In [None]:
def train(dataset, epochs):
  losses = []
  total_steps = "?"
  for epoch in range(epochs):
    start = time.time()

    generator_metric.reset_states()
    global_discriminator_metric.reset_states()
    local_discriminator_metric.reset_states()

    for s, image_batch in enumerate(dataset):
      update_discriminator = s % UPDATE_DISCRIMINATOR == 0
      train_step(image_batch, update_discr=update_discriminator)

      print(f"\r e {epoch}/{epochs}, s {s}/{total_steps}: Gen {generator_metric.result():0.4f}; Global discriminator: {global_discriminator_metric.result():0.4f}; Local discriminator: {local_discriminator_metric.result():0.4f}; Sim loss: {similarity_metric.result():0.4f}", end="")

    if epoch == 0:
      total_steps = s

    stop = time.time()
    print()
    print(f"\r[+] Epoch {epoch}/{epochs} in {(stop-start):0.4f} seconds. ({(stop-start)/total_steps:0.4f} s/step)")

    generate_plots(seed, f"imgs/GAN_generations/{epoch}.png")
    # Make generations on seed
    seed_generation = generate(seed, training=False)
    buff = plot_to_buff(seed_generation)

    batch_signature = signature_on_batch(seed_generation, [0, 1], [2, 3, 4, 5], config.met_params.signature_length)
    signature_difference = signature_metric(real_batch_signature, batch_signature)
    correlation_metric(signature_difference)
    
    l = [generator_metric.result(), global_discriminator_metric.result(), local_discriminator_metric.result()]
    losses.append(l)
    log_losses(epoch, buff)

  return np.array(losses)

In [None]:
training_losses = train(dset_simulated, EPOCHS)

## Plot Losses.

In [None]:
plt.figure(figsize=(18, 5))
plt.title("Training Losses.")
plt.plot(training_losses[:, 0], ".-", label="Generator Loss")
plt.plot(training_losses[:, 1], ".-", label="Discriminator Loss")
plt.grid()
plt.legend()
plt.show()

# Let's Test it!

In [None]:
after_training_generations = generator(seed, training=False)

plt.figure(figsize=(18, 5))
plt.title("Generation of the GAN whitout Training.")
for i in range(after_training_generations.shape[-1]):
    plt.plot(after_training_generations[0, :, i], label=f'feat {i+1}')
plt.grid()
plt.legend()
plt.show()

In [None]:
after_training_decision = discriminator(after_training_generations)
after_training_decision[0]

## Test if Mode Colapsing

In [None]:
# after_training_generations
def plot_several_generations(generations:np.ndarray, nvertical:int=3, nhoriz:int=3):

    legend = [f"feat {j}" for j in range(generations.shape[-1])]

    plt.figure(figsize=(18, 10))
    plt.suptitle("Generations After GAN Training.")

    for i in range(nvertical* nhoriz):
        ax = plt.subplot(nvertical, nhoriz, i+ 1)
        ax.set_title(f"sequence {i+1}")

        plt.plot(generations[i])
        ax.grid(True)
        plt.legend(legend)

    plt.tight_layout()
    plt.show()

plot_several_generations(after_training_generations)