In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'


import tensorflow as tf
# Initialize Tensorflow 
tf.keras.layers.Dense(100)
from src.utils.gpu_memory_grow import gpu_memory_grow

from src.utils import utils

gpus = tf.config.list_physical_devices('GPU')
gpu_memory_grow(gpus)

In [26]:
GENERATED_SEQUENCES_FOLDER = "data/generated"
CONTENT_DATA_PATH = "data/simulated_dataset/01 - Source Domain.h5"

SAVE_FOLDER = "results/amplitudes"

STYLE_DATASETS = [
        "data/simulated_dataset/amplitude_shift/1.0_1.0.h5", 
        "data/simulated_dataset/amplitude_shift/2.0_2.0.h5", 
        "data/simulated_dataset/amplitude_shift/3.0_3.0.h5", 
        "data/simulated_dataset/amplitude_shift/4.0_4.0.h5", 
        "data/simulated_dataset/amplitude_shift/5.0_5.0.h5", 
        "data/simulated_dataset/amplitude_shift/6.0_6.0.h5", 
        "data/simulated_dataset/amplitude_shift/7.0_7.0.h5" , 
        "data/simulated_dataset/amplitude_shift/8.0_8.0.h5" , 
        "data/simulated_dataset/amplitude_shift/9.0_9.0.h5" , 
        "data/simulated_dataset/amplitude_shift/10.0_10.0.h5"
    ]

STYLE_NAMES = [utils.get_name(f) for f in STYLE_DATASETS]

SEQ_SHAPE = (64, 7)

# Get Generated Sequences

In [27]:
def get_generated_dataset(real_style_path:list, gen_folder:str=GENERATED_SEQUENCES_FOLDER):
    filename = utils.get_name(real_style_path)
    
    train_gen_filepath = f"{gen_folder}/{filename}_train.tfrecords"
    valid_gen_filepath = f"{gen_folder}/{filename}_valid.tfrecords"
    
    dset_train = tf.data.Dataset.load(train_gen_filepath)
    dset_valid = tf.data.Dataset.load(valid_gen_filepath)
    
    dset_train = dset_train.unbatch().batch(64)
    dset_valid = dset_valid.unbatch().batch(64)
    
    return dset_train, dset_valid


In [28]:
def get_real_fake_datasets(style_datasets:list):
    real_style_dataset = {}
    fake_style_dataset = {}
    bs = 64 # args().simulated_arguments.batch_size

    for style_path in style_datasets:
        sty_name = utils.get_name(style_path)

        dset_style_train, dset_style_valid = utils.load_dset(style_path, drop_labels=False, bs=bs)
        dset_style_train = utils.extract_labels(dset_style_train)
        dset_style_valid = utils.extract_labels(dset_style_valid)

        stylized_train, stylized_valid = get_generated_dataset(style_path, GENERATED_SEQUENCES_FOLDER)

        fake_style_dataset[f"{sty_name}_train"] = stylized_train
        fake_style_dataset[f"{sty_name}_valid"] = stylized_valid
        
        real_style_dataset[f"{sty_name}_train"] = dset_style_train
        real_style_dataset[f"{sty_name}_valid"] = dset_style_valid
        
    return real_style_dataset, fake_style_dataset

In [29]:
dsets_real, dsets_fake = get_real_fake_datasets(STYLE_DATASETS)

# Generate Some Sequences.

In [30]:
def viz_comparison(real_dataset:dict, fake_dataset: dict, save_folder:str):

    fig = plt.figure(figsize=(10, 20))

    for i, style in enumerate(STYLE_DATASETS):
        style_name = utils.get_name(style)
        
        real_sequence = next(iter(real_dataset[f'{style_name}_valid']))[0][0]
        fake_sequence = next(iter(fake_dataset[f'{style_name}_valid']))[0][0]
        
        ax = plt.subplot(len(STYLE_DATASETS), 2, 2*i+1)
        ax.set_title(f'Real Sequence Style {i+ 1}.')
        ax.plot(real_sequence)
        ax.set_ylim(-0.1, 20.1)
        ax.grid()
        
        ax = plt.subplot(len(STYLE_DATASETS), 2, 2*i+2)
        ax.set_title(f'Fake Sequence Style {i+ 1}.')
        ax.plot(fake_sequence)
        ax.set_ylim(-0.1, 20.1)
        
        ax.grid()
        
    plt.tight_layout()
    plt.savefig(f"{save_folder}/generation_comparison.png")
    plt.close(fig)


In [31]:
viz_comparison(dsets_real, dsets_fake, SAVE_FOLDER)

# Compute our simple Metric

In [32]:
from src.eval import simple_metric

def get_batches(dset, n_batches):
    _arr = np.array([c[0] for c in dset.take(n_batches)])
    return _arr.reshape((-1, _arr.shape[-2], _arr.shape[-1]))


def compute_metrics(dset_real, dset_fake, style_names, model_folder: str):
    def time_shift_evaluation(big_batch):
        return [simple_metric.estimate_time_shift(big_batch, 0, i) for i in range(big_batch.shape[-1])]
    
    real_noise_metric, gen_noise_metric = [], []
    real_ampl_metric, gen_ampl_metric = [], []
    real_ts_metric, gen_ts_metric = [], []

    for style_name in style_names:
        print(f"[+] Compute metric for {style_name}")
        real_batch = get_batches(dset_real[f"{style_name}_valid"], 10)
        fake_batch = get_batches(dset_fake[f"{style_name}_valid"], 10)
        
        real_noise_metric.append(simple_metric.simple_metric_on_noise(real_batch)[-1])
        gen_noise_metric.append(simple_metric.simple_metric_on_noise(fake_batch)[-1])
        
        real_ampl_metric.append(simple_metric.extract_amplitude_from_signals(real_batch))
        gen_ampl_metric.append(simple_metric.extract_amplitude_from_signals(fake_batch))
        
        real_ts_metric.append(time_shift_evaluation(real_batch))
        gen_ts_metric.append(time_shift_evaluation(fake_batch))
        
    real_mean_noises = np.mean(real_noise_metric, axis=-1).reshape((-1, 1))
    fake_mean_noises = np.mean(gen_noise_metric, axis=-1).reshape((-1, 1))
    mean_noises = np.concatenate((real_mean_noises, fake_mean_noises), axis=-1)
    
    real_mean_ampl = np.mean(real_ampl_metric, axis=-1).reshape((-1, 1))
    fake_mean_ampl = np.mean(gen_ampl_metric, axis=-1).reshape((-1, 1))
    mean_ampl= np.concatenate((real_mean_ampl, fake_mean_ampl), axis=-1)
    
    real_mean_time_shift = np.mean(real_ts_metric, axis=-1).reshape((-1, 1))
    fake_mean_time_shift = np.mean(gen_ts_metric, axis=-1).reshape((-1, 1))
    mean_time_shift= np.concatenate((real_mean_time_shift, fake_mean_time_shift), axis=-1)
    
    df_noises = pd.DataFrame(data=mean_noises, index=style_names, columns=['Real', 'Fake'])
    df_ampl = pd.DataFrame(data=mean_ampl, index=style_names, columns=['Real', 'Fake'])
    df_time_shift = pd.DataFrame(data=mean_time_shift, index=style_names, columns=['Real', 'Fake'])
    
    df_noises.to_hdf(f'{model_folder}/noise_metric.h5', key="data")
    df_ampl.to_hdf(f'{model_folder}/ampl_metric.h5', key="data")
    df_time_shift.to_hdf(f'{model_folder}/time_shift_metric.h5', key="data")
    
    return df_noises, df_ampl, df_time_shift

def plot_metric(df_metric:pd.DataFrame, title, save_to):
    fig = plt.figure(figsize=(18, 10))
    ax = plt.subplot(111)
    
    df_metric["Real"].plot(ax=ax, style='.-')
    df_metric["Fake"].plot(ax=ax, style='.-')
    
    ax.grid(True)
    ax.set_title(title)
    ax.legend()
    
    plt.savefig(save_to)
    plt.close(fig)

In [33]:
df_metric_noise, df_metric_amplitude, df_metric_time_shift = compute_metrics(dsets_real, dsets_fake, STYLE_NAMES, SAVE_FOLDER)

[+] Compute metric for 1.0_1.0
[+] Compute metric for 2.0_2.0
[+] Compute metric for 3.0_3.0
[+] Compute metric for 4.0_4.0
[+] Compute metric for 5.0_5.0
[+] Compute metric for 6.0_6.0
[+] Compute metric for 7.0_7.0
[+] Compute metric for 8.0_8.0
[+] Compute metric for 9.0_9.0
[+] Compute metric for 10.0_10.0


In [34]:
plot_metric(df_metric_amplitude, "[Style Time Bench]: Metric on Amplitude", f"{SAVE_FOLDER}/STB_")
plot_metric(df_metric_noise, "[Style Time Bench]: Metric on Noise", SAVE_FOLDER)
plot_metric(df_metric_time_shift, "[Style Time Bench]: Metric on Time Shift", SAVE_FOLDER)

# TSTR

In [35]:
from src.eval import tstr as tstr_utils

def tstr(
    dset_train_real,
    dset_valid_real,
    dset_train_fake, 
    dset_valid_fake, 
    save_to:str):

    print('[+] Train Real, Test Real.')
    real_performances, hist_real = tstr_utils.train_naive_discriminator(dset_train_real, dset_valid_real, SEQ_SHAPE, epochs=5, n_classes=5)

    print("[+] Train Synthetic, Test Synthetic")
    gen_perf1, hist_fake1 = tstr_utils.train_naive_discriminator(dset_train_fake, dset_valid_fake, SEQ_SHAPE, epochs=5, n_classes=5)
    
    print("[+] Train Synthetic, Test Real")
    gen_perf2, hist_fake2 = tstr_utils.train_naive_discriminator(dset_train_fake, dset_valid_real, SEQ_SHAPE, epochs=5, n_classes=5)
    
    
    fig = plt.figure(figsize=(18, 10))
    
    ax = plt.subplot(211)
    
    plt.plot(hist_real.history["loss"], ".-", label='Train Real Test Real (Train)')
    plt.plot(hist_real.history["val_loss"], ".-", label='Train Real Test Real (Valid)')
    
    plt.plot(hist_fake1.history["loss"], ".-", label='Train Synthetic, Test Synthetic (Train)')
    plt.plot(hist_fake1.history["val_loss"], ".-", label='Train Synthetic, Test Synthetic (Valid)')
    
    plt.plot(hist_fake2.history["loss"], ".-", label='Train Real, Test Synthetic (Train)')
    plt.plot(hist_fake2.history["val_loss"], ".-", label='Train Real, Test Synthetic (Valid)')
    
    ax.legend()
    ax.grid()
    ax = plt.subplot(212)
    
    plt.plot(hist_real.history["sparse_categorical_accuracy"], ".-", label='Classification Acc on Real (Train)')
    plt.plot(hist_real.history["val_sparse_categorical_accuracy"], ".-", label='Classification Acc on Real (Valid)')
    
    plt.plot(hist_fake1.history["sparse_categorical_accuracy"], ".-", label='Train Synthetic, Test Synthetic (Train)')
    plt.plot(hist_fake1.history["val_sparse_categorical_accuracy"], ".-", label='Train Synthetic, Test Synthetic (Valid)')
    
    plt.plot(hist_fake2.history["sparse_categorical_accuracy"], ".-", label='Train Real, Test Synthetic (Train)')
    plt.plot(hist_fake2.history["val_sparse_categorical_accuracy"], ".-", label='Train Real, Test Synthetic (Valid)')
    
    ax.grid()
    ax.legend()
    
    plt.savefig(save_to)
    
    plt.close(fig)
    
    return real_performances, gen_perf2


def tstr_on_styles(real_dataset, fake_dataset, style_names, model_folder):
    tstr_stats = {}

    for style_ in style_names:
        print(f'[+] Training on dataset {style_}.')
        
        perf_on_real, perf_on_fake = tstr(
            real_dataset[f"{style_}_train"],
            real_dataset[f"{style_}_valid"],
            fake_dataset[f"{style_}_train"],
            fake_dataset[f"{style_}_valid"], 
            f'{model_folder}/tstr_{style_}.png'
        )
        
        tstr_stats[f"{style_}_real"] = [perf_on_real]
        tstr_stats[f"{style_}_gen"] = [perf_on_fake]
        
    tstr_stats = pd.DataFrame.from_dict(tstr_stats)

    tstr_stats.to_hdf(f"{model_folder}/tstr.h5", key="data")
    
    return tstr_stats

In [36]:
tstr_on_styles(dsets_real, dsets_fake, STYLE_NAMES, SAVE_FOLDER)

[+] Training on dataset 1.0_1.0.
[+] Train Real, Test Real.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Synthetic
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Real
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Training on dataset 2.0_2.0.
[+] Train Real, Test Real.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Synthetic
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Real
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Training on dataset 3.0_3.0.
[+] Train Real, Test Real.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Synthetic
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Real
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Training on dataset 4.0_4.0.
[+] Train Real, Test Real.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[+] Train Synthetic, Test Synthetic
Epoch 1/5
Epoch 2/5
Epo

Unnamed: 0,1.0_1.0_real,1.0_1.0_gen,2.0_2.0_real,2.0_2.0_gen,3.0_3.0_real,3.0_3.0_gen,4.0_4.0_real,4.0_4.0_gen,5.0_5.0_real,5.0_5.0_gen,6.0_6.0_real,6.0_6.0_gen,7.0_7.0_real,7.0_7.0_gen,8.0_8.0_real,8.0_8.0_gen,9.0_9.0_real,9.0_9.0_gen,10.0_10.0_real,10.0_10.0_gen
0,0.995287,0.295179,0.998739,0.564406,0.999505,0.295179,1.0,0.455512,1.0,0.64777,1.0,0.356133,1.0,0.5394,1.0,0.262023,1.0,0.212071,1.0,0.36757


# UMAP

In [22]:
import umap
from sklearn.manifold import TSNE

def multi_umap_plot(real_styles, gen_styles):
    (_, _, seq_len, n_sigs) = real_styles.shape
    
    print(real_styles.shape)
    print(fake_batches.shape)
    
    concatenated = tf.concat((real_styles, gen_styles), 0)

    concatenated = tf.reshape(concatenated, (-1, seq_len, n_sigs))
    concatenated = tf.transpose(concatenated, (0, 2, 1))
    
    concatenated = tf.reshape(concatenated, (concatenated.shape[0], -1))

    # # # Normalize all sequences for the reducer.
    _mean, _std = tf.math.reduce_mean(concatenated), tf.math.reduce_std(concatenated)
    concatenated = (concatenated - _mean)/_std

    reducer = umap.UMAP(n_neighbors=300, min_dist=1., random_state=42, metric="euclidean") 
    reduced = reducer.fit_transform(concatenated)
    return reduced


def multi_tsne_plot(real_styles, gen_styles):
    (_, _, seq_len, n_sigs) = real_styles.shape
    
    concatenated = tf.concat((real_styles, gen_styles), 0)

    concatenated = tf.reshape(concatenated, (-1, seq_len, n_sigs))
    concatenated = tf.transpose(concatenated, (0, 2, 1))
    
    concatenated = tf.reshape(concatenated, (concatenated.shape[0], -1))

    # # # Normalize all sequences for the reducer.
    _mean, _std = tf.math.reduce_mean(concatenated), tf.math.reduce_std(concatenated)
    concatenated = (concatenated - _mean)/_std

    reducer = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=150, random_state=42)
    reduced = reducer.fit_transform(concatenated)
    return reduced


def generate_per_style_batch(dset_real, dset_fake, style_names):
    real_batches = []
    fake_batches = []

    for _, style_ in enumerate(style_names):
        real_style_batch = get_batches(dset_real[f"{style_}_valid"], 10)
        fake_style_batch = get_batches(dset_fake[f"{style_}_valid"], 10)
        
        real_batches.append(real_style_batch)
        fake_batches.append(fake_style_batch)
        
    return np.array(real_batches), np.array(fake_batches) 

def dimentionality_reduction_plot(real_batches, fake_batches, style_names, model_folder, type="umap"):
    
    if type == 'umap':
        reduced_points = multi_umap_plot(real_batches, fake_batches)
    elif type == "tsne":
        reduced_points = multi_tsne_plot(real_batches, fake_batches)
    else: 
        raise Exception("No Dimentionality reduction algorthm selected.")
        
    n_styles = len(style_names)
    (n_styles, bs, _, _) = real_batches.shape

    cmap = plt.get_cmap("tab20")
    colors = cmap(np.linspace(0, 1, n_styles*2))

    plt.figure(figsize=(18, 10))
    for i in range(n_styles):
        ri, ro = i*bs, (i+1)*bs
        fi, fo =  (i+ n_styles) * bs, (i+ n_styles+ 1) * bs
        
        plt.scatter(reduced_points[ri:ro, 0], reduced_points[ri:ro, 1], label=f"Real Style {i+ 1}", alpha=0.5, color=colors[2*i], s=4)
        plt.scatter(reduced_points[fi:fo, 0], reduced_points[fi:fo, 1], label=f"Generated Style {i+ 1}", alpha=0.5, color=colors[2*i+1 ], s=4)
        
    plt.grid()
    plt.title(f"{type} Reduction of Time Series", fontsize=15)
    plt.ylabel(f"y_{type}", fontsize=15)
    plt.xlabel(f"x_{type}", fontsize=15)
    plt.legend()
    plt.savefig(f"{model_folder}/{type}.png")
    plt.show()

In [23]:
real_batches, fake_batches = generate_per_style_batch(dsets_real, dsets_fake, STYLE_NAMES)

In [24]:
dimentionality_reduction_plot(real_batches, fake_batches, STYLE_NAMES, SAVE_FOLDER, "umap")
dimentionality_reduction_plot(real_batches, fake_batches, STYLE_NAMES, SAVE_FOLDER, "tsne")

(10, 640, 64, 7)
(10, 30, 64, 7)


InvalidArgumentError: {{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Dimension 1 in both shapes must be equal: shape[0] = [10,640,64,7] vs. shape[1] = [10,30,64,7] [Op:ConcatV2] name: concat