In [1]:
import matplotlib.pyplot as plt
from torch.utils import data
import numpy as np
from tqdm import tqdm
import random
from visualisationMetrics import *
from dataLoader import *
from utils.utils import *
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict
from tqdm import tqdm




In [2]:
NSETS = 20
LOGDIR = 'latest-model'

# Load results

### Load real data

In [3]:
real_data = load_dataset(data_mode='Train')
real_dataloader = data.DataLoader(real_data, batch_size=1, num_workers=1, shuffle=True)

[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4344, 1, 1, 3), X_test's shape is (485, 1, 1, 3)
y_train's label shape is (4344, 1, 1, 43), y_test's label shape is (485, 1, 1, 43)


In [4]:
real_paths = []
real_conds = []

for i, (cond, sim) in enumerate(real_dataloader):
    sim = sim.cpu().detach().numpy()
    sim = sim.reshape(sim.shape[1], sim.shape[3])
    real_paths.append(sim)
    cond = cond.cpu().detach().numpy()
    cond = cond.reshape(cond.shape[1], cond.shape[3])
    real_conds.append(cond)

real_paths = np.array(real_paths)
real_conds = np.array(real_conds)
print(real_paths.shape)
print(real_conds.shape)

real_vals = np.transpose(real_paths, (0,2,1))
print(real_vals.shape)
to_pkl(f'results/real_vals.pkl', real_vals)

(485, 1, 43)
(485, 1, 3)
(485, 43, 1)


### Load synthetic data

In [5]:
from LoadSynthetic import *

In [6]:
def get_syn_vals(model_path=f'./logs/{LOGDIR}', n=10, **kwargs):
    syn_data = Synthetic_Dataset(model_path=model_path, n=n, dataset=real_data, **kwargs)
    dataloader = data.DataLoader(syn_data, batch_size=1, num_workers=1, shuffle=True, **kwargs)

    paths = []
    conds = []

    for i, (cond, sim) in enumerate(dataloader):
        sim = sim.cpu().detach().numpy()
        sim = sim.reshape(sim.shape[1], sim.shape[3])
        paths.append(sim)
        cond = cond.cpu().detach().numpy()
        cond = cond.reshape(cond.shape[1], cond.shape[3])
        conds.append(cond)
        
    paths = np.array(paths)
    conds = np.array(conds)
    vals = np.transpose(paths, (0,2,1))
    np.random.shuffle(vals)
    return vals

def get_syn_val_sets(nsets, **kwargs):
    val_sets = []
    for i in tqdm(range(nsets)):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [9]:
syn_val_sets = get_syn_val_sets(nsets=NSETS)
to_pkl(f'logs/{LOGDIR}/results/syn_val_sets.pkl', syn_val_sets)

In [None]:
to_pkl(f'logs/{LOGDIR}/results/syn_val_sets.pkl', syn_val_sets)

### Load benchmark data

In [5]:
def get_benchmark_vals(n=10, mode='gbm', **kwargs):
    if mode=='gbm':
        simulator = GBM_Simulator(dataset=real_data, nsamples=n)
    elif mode=='cev':
        simulator = CEV_Simulator(dataset=real_data, nsamples=n)
    elif mode=='heston':
        simulator = Heston_Simulator(dataset=real_data, nsamples=n)
    else:
        raise NotImplementedError
    paths = simulator.run()
    paths = np.reshape(paths, (-1,1,paths.shape[-1]))

    vals = np.transpose(paths, (0, 2, 1))
    np.random.shuffle(vals)
    return vals

def get_benchmark_val_sets(nsets, mode='gbm', **kwargs):
    val_sets = []
    for i in tqdm(range(nsets)):
        val_sets.append(get_benchmark_vals(mode=mode, **kwargs))
    return val_sets

In [6]:
for mode in ['gbm', 'cev', 'heston']:
    benchmark_val_sets = get_benchmark_val_sets(mode=mode, nsets=NSETS)
    to_pkl(f'results/{mode}_val_sets.pkl', benchmark_val_sets)

100%|██████████| 20/20 [42:47<00:00, 128.36s/it]


# Run evaluations

In [10]:
real_vals = from_pkl(f'results/real_vals.pkl')
syn_val_sets = from_pkl(f'logs/{LOGDIR}/results/syn_val_sets.pkl')
gbm_val_sets = from_pkl(f'results/gbm_val_sets.pkl')
cev_val_sets = from_pkl(f'results/cev_val_sets.pkl')
heston_val_sets = from_pkl(f'results/heston_val_sets.pkl')

In [11]:
eval_scores = {
    'js_pca': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    },
    'js_tsne': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    },
    'fid': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    }
}

In [12]:
def js_div(real_vals, other_vals_list, mode, n_components=10, **kwargs):
    
    vals_list = dim_reduction([real_vals]+other_vals_list, n_components=n_components, mode=mode, **kwargs)

    scores = []
    for i in range(1,len(other_vals_list)+1):
        scores.append(js_divergence(vals_list[0], vals_list[i], verbose=False))
    return scores

In [13]:
for syn_vals, gbm_vals, cev_vals, heston_vals in tqdm(zip(syn_val_sets, gbm_val_sets, cev_val_sets, heston_val_sets)):
    for iter in range(3):
        # JS-PCA
        syn_score, gbm_score, cev_score, heston_score  = js_div(real_vals, [syn_vals, gbm_vals, cev_vals, heston_vals], mode='pca')
        eval_scores['js_pca']['syn'].append(syn_score)
        eval_scores['js_pca']['gbm'].append(gbm_score)
        eval_scores['js_pca']['cev'].append(cev_score)
        eval_scores['js_pca']['heston'].append(heston_score)
        
        # JS-TSNE
        syn_score, gbm_score, cev_score, heston_score = js_div(real_vals, [syn_vals, gbm_vals, cev_vals, heston_vals], mode='tsne')
        eval_scores['js_tsne']['syn'].append(syn_score)
        eval_scores['js_tsne']['gbm'].append(gbm_score)
        eval_scores['js_tsne']['cev'].append(cev_score)
        eval_scores['js_tsne']['heston'].append(heston_score)
    
    # FID
    syn_score = fid(real_vals, syn_vals)
    gbm_score = fid(real_vals, gbm_vals)
    cev_score = fid(real_vals, cev_vals)
    heston_score = fid(real_vals, heston_vals)
    eval_scores['fid']['syn'].append(syn_score)
    eval_scores['fid']['gbm'].append(gbm_score)
    eval_scores['fid']['cev'].append(cev_score)
    eval_scores['fid']['heston'].append(heston_score)

to_pkl(f'logs/{LOGDIR}/results/eval_scores.pkl', eval_scores)

0it [00:00, ?it/s]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.297s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039014
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.960800
[t-SNE] KL divergence after 300 iterations: 1.620564
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.217s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039014
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.961090
[t-SNE] KL divergence after 300 iterations: 1.620436


1it [00:19, 19.32s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.331s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039381
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.833694
[t-SNE] KL divergence after 300 iterations: 1.666568
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.406s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039381
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.832275
[t-SNE] KL divergence after 300 iterations: 1.662717


2it [00:41, 21.28s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.333s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039975
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.919968
[t-SNE] KL divergence after 300 iterations: 1.703150
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.361s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039975
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.919785
[t-SNE] KL divergence after 300 iterations: 1.704209


3it [01:09, 24.21s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.331s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039493
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.531761
[t-SNE] KL divergence after 300 iterations: 1.600297
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.016s...
[t-SNE] Computed neighbors for 2425 samples in 0.346s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039493
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.531654
[t-SNE] KL divergence after 300 iterations: 1.600002


4it [01:41, 27.07s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.314s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039494
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.782043
[t-SNE] KL divergence after 300 iterations: 1.822071
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.348s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039494
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.781197
[t-SNE] KL divergence after 300 iterations: 1.822197


5it [02:17, 30.36s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.551s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039732
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.741463
[t-SNE] KL divergence after 300 iterations: 1.749426
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.270s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039732
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.741623
[t-SNE] KL divergence after 300 iterations: 1.748052


6it [02:59, 34.50s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.373s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.038846
[t-SNE] KL divergence after 250 iterations with early exaggeration: 65.053246
[t-SNE] KL divergence after 300 iterations: 1.788158
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.337s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.038846
[t-SNE] KL divergence after 250 iterations with early exaggeration: 65.053909
[t-SNE] KL divergence after 300 iterations: 1.791058


7it [03:36, 35.05s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.336s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039147
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.672249
[t-SNE] KL divergence after 300 iterations: 1.680900
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.307s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039147
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.672523
[t-SNE] KL divergence after 300 iterations: 1.686049


8it [04:11, 35.27s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.327s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039014
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.597359
[t-SNE] KL divergence after 300 iterations: 1.706527
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.375s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039014
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.599205
[t-SNE] KL divergence after 300 iterations: 1.706703


9it [04:48, 35.78s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.351s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039401
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.717590
[t-SNE] KL divergence after 300 iterations: 1.638181
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.379s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039401
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.717842
[t-SNE] KL divergence after 300 iterations: 1.638284


10it [05:25, 36.18s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.373s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.038783
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.742950
[t-SNE] KL divergence after 300 iterations: 1.610184
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.609s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.038783
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.743347
[t-SNE] KL divergence after 300 iterations: 1.610346


11it [06:16, 40.53s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.368s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039186
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.847435
[t-SNE] KL divergence after 300 iterations: 1.645390
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.371s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039186
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.847321
[t-SNE] KL divergence after 300 iterations: 1.646004


12it [07:07, 43.76s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.360s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039013
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.679375
[t-SNE] KL divergence after 300 iterations: 1.608071
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.336s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039013
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.679832
[t-SNE] KL divergence after 300 iterations: 1.607768


13it [07:58, 45.96s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.367s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039360
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.638359
[t-SNE] KL divergence after 300 iterations: 1.625686
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.420s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039360
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.638817
[t-SNE] KL divergence after 300 iterations: 1.626169


14it [08:48, 47.38s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.439s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039422
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.759796
[t-SNE] KL divergence after 300 iterations: 1.599640
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.377s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039422
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.760033
[t-SNE] KL divergence after 300 iterations: 1.599617


15it [09:41, 48.93s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.606s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039529
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.687241
[t-SNE] KL divergence after 300 iterations: 1.612697
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.389s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039529
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.687172
[t-SNE] KL divergence after 300 iterations: 1.612649


16it [10:31, 49.32s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.430s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039094
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.766701
[t-SNE] KL divergence after 300 iterations: 1.594919
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.522s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039094
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.766472
[t-SNE] KL divergence after 300 iterations: 1.594997


17it [11:26, 50.85s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.355s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039097
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.784714
[t-SNE] KL divergence after 300 iterations: 1.640230
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.380s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039097
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.784172
[t-SNE] KL divergence after 300 iterations: 1.644475


18it [12:18, 51.31s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.001s...
[t-SNE] Computed neighbors for 2425 samples in 0.366s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039219
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.604507
[t-SNE] KL divergence after 300 iterations: 1.678837
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.000s...
[t-SNE] Computed neighbors for 2425 samples in 0.600s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039219
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.604942
[t-SNE] KL divergence after 300 iterations: 1.684098


19it [13:09, 51.24s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.370s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039685
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.980896
[t-SNE] KL divergence after 300 iterations: 1.630360
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2425 samples in 0.002s...
[t-SNE] Computed neighbors for 2425 samples in 0.575s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2425
[t-SNE] Computed conditional probabilities for sample 2000 / 2425
[t-SNE] Computed conditional probabilities for sample 2425 / 2425
[t-SNE] Mean sigma: 0.039685
[t-SNE] KL divergence after 250 iterations with early exaggeration: 64.980957
[t-SNE] KL divergence after 300 iterations: 1.630423


20it [13:49, 41.46s/it]


# Run hypothesis tests

In [14]:
from scipy.stats import ttest_ind
eval_scores = from_pkl(f'logs/{LOGDIR}/results/eval_scores.pkl')

In [15]:
def hypothesis_test(syn_scores, gbm_scores):
    syn_scores = np.array(syn_scores)
    gbm_scores = np.array(gbm_scores)
    print("Synthetic:")
    print(f"\tmean  = {syn_scores.mean()}")
    print(f"\tstdev = {syn_scores.std(ddof=1)}")
    print("Benchmark:")
    print(f"\tmean  = {gbm_scores.mean()}")
    print(f"\tstdev = {gbm_scores.std(ddof=1)}")

    p_value = ttest_ind(syn_scores, gbm_scores, alternative='less').pvalue 
    print(f"p-value = {p_value}")

### PCA JS-Divergence

In [16]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['gbm'])

Synthetic:
	mean  = 7.615977714377631
	stdev = 0.630243592981955
Benchmark:
	mean  = 7.242417676190246
	stdev = 0.5478223669189014
p-value = 0.9996301744518482


In [17]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['cev'])

Synthetic:
	mean  = 7.615977714377631
	stdev = 0.630243592981955
Benchmark:
	mean  = 6.300331258760185
	stdev = 0.502246362229912
p-value = 1.0


In [18]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['heston'])

Synthetic:
	mean  = 7.615977714377631
	stdev = 0.630243592981955
Benchmark:
	mean  = 6.093827098918665
	stdev = 0.44447805838727455
p-value = 1.0


### TSNE JS-Divergence

In [19]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['gbm'])

Synthetic:
	mean  = 0.20648791172827227
	stdev = 0.03399622522092403
Benchmark:
	mean  = 0.24909505547513716
	stdev = 0.039781046058691594
p-value = 2.5671830971693217e-09


In [20]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['cev'])

Synthetic:
	mean  = 0.20648791172827227
	stdev = 0.03399622522092403
Benchmark:
	mean  = 0.17166075341452747
	stdev = 0.0270114788657829
p-value = 0.999999995963944


In [21]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['heston'])

Synthetic:
	mean  = 0.20648791172827227
	stdev = 0.03399622522092403
Benchmark:
	mean  = 0.17354885935581496
	stdev = 0.03815983942616261
p-value = 0.9999989590271716


### FID Score

In [22]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['gbm'])

Synthetic:
	mean  = 0.022532573406634
	stdev = 0.000995650827122811
Benchmark:
	mean  = 0.007541947840446617
	stdev = 0.0005758004657698565
p-value = 1.0


In [23]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['cev'])

Synthetic:
	mean  = 0.022532573406634
	stdev = 0.000995650827122811
Benchmark:
	mean  = 0.004560605563891268
	stdev = 0.00037142154675998525
p-value = 1.0


In [24]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['heston'])

Synthetic:
	mean  = 0.022532573406634
	stdev = 0.000995650827122811
Benchmark:
	mean  = 0.003090025245840583
	stdev = 0.00043690669595794647
p-value = 1.0
