In [1]:
import matplotlib.pyplot as plt
from torch.utils import data
import numpy as np
from tqdm import tqdm
import random
from visualisationMetrics import *
from dataLoader import *
from utils.utils import to_pkl, from_pkl, js_divergence, fid
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict




In [2]:
NSETS = 20

# Load results

### Load real data

In [3]:
real_data = load_dataset(data_mode='Train')
real_dataloader = data.DataLoader(real_data, batch_size=1, num_workers=1, shuffle=True)

real_paths = []
real_conds = []

for i, (cond, sim) in enumerate(real_dataloader):
    sim = sim.cpu().detach().numpy()
    sim = sim.reshape(sim.shape[1], sim.shape[3])
    real_paths.append(sim)
    cond = cond.cpu().detach().numpy()
    cond = cond.reshape(cond.shape[1], cond.shape[3])
    real_conds.append(cond)

real_paths = np.array(real_paths)
real_conds = np.array(real_conds)
print(real_paths.shape)
print(real_conds.shape)

real_vals = np.transpose(real_paths, (0,2,1))
print(real_vals.shape)
to_pkl('results/real_vals.pkl', real_vals)

[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4328, 1, 1, 2), X_test's shape is (481, 1, 1, 2)
y_train's label shape is (4328, 1, 1, 43), y_test's label shape is (481, 1, 1, 43)
(481, 1, 43)
(481, 1, 2)
(481, 43, 1)


### Load synthetic data

In [4]:
from LoadSynthetic import *

In [9]:
def get_syn_vals(model_path='./logs/latest/Model/checkpoint', n=10 **kwargs):
    data = Synthetic_Dataset(model_path=model_path, n=n, dataset=real_data, seq_len=real_data.output_size, conditions_dim=real_data.X_train.shape[-1], **kwargs)
    dataloader = data.DataLoader(syn_data, batch_size=1, num_workers=1, shuffle=True, **kwargs)

    paths = []
    conds = []

    for i, (cond, sim) in enumerate(dataloader):
        sim = sim.cpu().detach().numpy()
        sim = sim.reshape(sim.shape[1], sim.shape[3])
        paths.append(sim)
        cond = cond.cpu().detach().numpy()
        cond = cond.reshape(cond.shape[1], cond.shape[3])
        conds.append(cond)
        
    paths = np.array(paths)
    conds = np.array(conds)
    vals = np.transpose(paths, (0,2,1))
    np.random.shuffle(vals)
    return vals

def get_syn_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [10]:
syn_val_sets = get_syn_val_sets(nsets=NSETS)
to_pkl('results/syn_val_sets.pkl', syn_val_sets)

### Load benchmark data

In [11]:
from utils.utils import GBM_Simulator

In [12]:
def get_gbm_vals(n=10, **kwargs):
    simulator = GBM_Simulator(dataset=real_data, nsamples=n)
    paths = simulator.run()
    paths = np.reshape(paths, (-1,1,paths.shape[-1]))

    vals = np.transpose(paths, (0, 2, 1))
    np.random.shuffle(vals)
    return vals

def get_gbm_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [13]:
gbm_val_sets = get_gbm_val_sets(nsets=NSETS)
to_pkl('results/gbm_val_sets.pkl', gbm_val_sets)

# Run evaluations

In [3]:
real_vals = from_pkl('results/real_vals.pkl')
syn_val_sets = from_pkl('results/syn_val_sets.pkl')
gbm_val_sets = from_pkl('results/gbm_val_sets.pkl')

In [6]:
eval_scores = {
    'js_pca': {
        'syn': [],
        'gbm': []
    },
    'js_tsne': {
        'syn': [],
        'gbm': []
    },
    'fid': {
        'syn': [],
        'gbm': []
    }
}

In [7]:
def js_div(real_vals, syn_vals, gbm_vals, mode, n_components=10, **kwargs):
    real, syn, gbm = dim_reduction([real_vals, syn_vals, gbm_vals], n_components=n_components, mode=mode, **kwargs)
    syn_score = js_divergence(real, syn, verbose=False)
    gbm_score = js_divergence(real, gbm, verbose=False)
    return syn_score, gbm_score

In [21]:
for syn_vals, gbm_vals in tqdm(zip(syn_val_sets, gbm_val_sets)):
    # for iter in range(3):
    #     # JS-PCA
    #     syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='pca')
    #     eval_scores['js_pca']['syn'].append(syn_score)
    #     eval_scores['js_pca']['gbm'].append(gbm_score)
        
    #     # JS-TSNE
    #     syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='tsne')
    #     eval_scores['js_tsne']['syn'].append(syn_score)
    #     eval_scores['js_tsne']['gbm'].append(gbm_score)
    
    # FID
    syn_score = fid(real_vals, syn_vals)
    gbm_score = fid(real_vals, gbm_vals)
    eval_scores['fid']['syn'].append(syn_score)
    eval_scores['fid']['gbm'].append(gbm_score)

to_pkl('results/eval_scores.pkl', eval_scores)

20it [00:00, 125.23it/s]


# Run hypothesis tests

In [11]:
from scipy.stats import ttest_ind
eval_scores = from_pkl('results/eval_scores.pkl')

In [13]:
def hypothesis_test(syn_scores, gbm_scores):
    syn_scores = np.array(syn_scores)
    gbm_scores = np.array(gbm_scores)
    print("Synthetic:")
    print(f"\tmean  = {syn_scores.mean()}")
    print(f"\tstdev = {syn_scores.std()}")
    print("Benchmark:")
    print(f"\tmean  = {gbm_scores.mean()}")
    print(f"\tstdev = {gbm_scores.std()}")    

### PCA JS-Divergence

In [14]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['gbm'])

Synthetic:
	mean  = 3.4993658878660705
	stdev = 0.36186398609631226
Benchmark:
	mean  = 3.435675627903045
	stdev = 0.42417521597528157


### TSNE JS-Divergence

In [16]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['gbm'])

Synthetic:
	mean  = 0.23291941152119952
	stdev = 0.025523383725958342
Benchmark:
	mean  = 0.23105095644953846
	stdev = 0.030810723779002624


### FID Score

In [22]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['gbm'])

Synthetic:
	mean  = 0.012538626986795778
	stdev = 0.0003862124438919217
Benchmark:
	mean  = 0.012547210414578019
	stdev = 0.0003672441023192968
