In [86]:
import matplotlib.pyplot as plt
from torch.utils import data
import numpy as np
from tqdm import tqdm
import random
from visualisationMetrics import *
from dataLoader import *
from utils.utils import to_pkl, from_pkl, js_divergence, fid
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict

In [104]:
NSETS = 20
LOGIR_NAME = 'var-moderate'

if not os.path.exists(f'results/{LOGIR_NAME}/'):
    os.mkdir(f'results/{LOGIR_NAME}/')

# Load results

### Load real data

In [48]:
real_data = load_dataset(data_mode='Train')
real_dataloader = data.DataLoader(real_data, batch_size=1, num_workers=1, shuffle=True)

real_paths = []
real_conds = []

for i, (cond, sim) in enumerate(real_dataloader):
    sim = sim.cpu().detach().numpy()
    sim = sim.reshape(sim.shape[1], sim.shape[3])
    real_paths.append(sim)
    cond = cond.cpu().detach().numpy()
    cond = cond.reshape(cond.shape[1], cond.shape[3])
    real_conds.append(cond)

real_paths = np.array(real_paths)
real_conds = np.array(real_conds)
print(real_paths.shape)
print(real_conds.shape)

real_vals = np.transpose(real_paths, (0,2,1))
print(real_vals.shape)
to_pkl(f'results/real_vals.pkl', real_vals)

[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4328, 1, 1, 2), X_test's shape is (481, 1, 1, 2)
y_train's label shape is (4328, 1, 1, 43), y_test's label shape is (481, 1, 1, 43)
(481, 1, 43)
(481, 1, 2)
(481, 43, 1)


### Load synthetic data

In [88]:
from LoadSynthetic import *

In [89]:
def get_syn_vals(model_path=f'./logs/{LOGIR_NAME}/Model/checkpoint', n=10, **kwargs):
    syn_data = Synthetic_Dataset(model_path=model_path, n=n, dataset=real_data, seq_len=real_data.output_size, conditions_dim=real_data.X_train.shape[-1], **kwargs)
    dataloader = data.DataLoader(syn_data, batch_size=1, num_workers=1, shuffle=True, **kwargs)

    paths = []
    conds = []

    for i, (cond, sim) in enumerate(dataloader):
        sim = sim.cpu().detach().numpy()
        sim = sim.reshape(sim.shape[1], sim.shape[3])
        paths.append(sim)
        cond = cond.cpu().detach().numpy()
        cond = cond.reshape(cond.shape[1], cond.shape[3])
        conds.append(cond)
        
    paths = np.array(paths)
    conds = np.array(conds)
    vals = np.transpose(paths, (0,2,1))
    np.random.shuffle(vals)
    return vals

def get_syn_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [90]:
syn_val_sets = get_syn_val_sets(nsets=NSETS)
to_pkl(f'results/{LOGIR_NAME}/syn_val_sets.pkl', syn_val_sets)

### Load benchmark data

In [52]:
from utils.utils import GBM_Simulator

In [53]:
def get_gbm_vals(n=10, **kwargs):
    simulator = GBM_Simulator(dataset=real_data, nsamples=n)
    paths = simulator.run()
    paths = np.reshape(paths, (-1,1,paths.shape[-1]))

    vals = np.transpose(paths, (0, 2, 1))
    np.random.shuffle(vals)
    return vals

def get_gbm_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [54]:
gbm_val_sets = get_gbm_val_sets(nsets=NSETS)
to_pkl(f'results/gbm_val_sets.pkl', gbm_val_sets)

# Run evaluations

In [105]:
real_vals = from_pkl(f'results/real_vals.pkl')
syn_val_sets = from_pkl(f'results/{LOGIR_NAME}/syn_val_sets.pkl')
gbm_val_sets = from_pkl(f'results/gbm_val_sets.pkl')

In [106]:
eval_scores = {
    'js_pca': {
        'syn': [],
        'gbm': []
    },
    'js_tsne': {
        'syn': [],
        'gbm': []
    },
    'fid': {
        'syn': [],
        'gbm': []
    }
}

In [107]:
def js_div(real_vals, syn_vals, gbm_vals, mode, n_components=10, **kwargs):
    real, syn, gbm = dim_reduction([real_vals, syn_vals, gbm_vals], n_components=n_components, mode=mode, **kwargs)
    syn_score = js_divergence(real, syn, verbose=False)
    gbm_score = js_divergence(real, gbm, verbose=False)
    return syn_score, gbm_score

In [94]:
for syn_vals, gbm_vals in tqdm(zip(syn_val_sets, gbm_val_sets)):
    for iter in range(3):
        # JS-PCA
        syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='pca')
        eval_scores['js_pca']['syn'].append(syn_score)
        eval_scores['js_pca']['gbm'].append(gbm_score)
        
        # JS-TSNE
        syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='tsne')
        eval_scores['js_tsne']['syn'].append(syn_score)
        eval_scores['js_tsne']['gbm'].append(gbm_score)
    
    # FID
    syn_score = fid(real_vals, syn_vals)
    gbm_score = fid(real_vals, gbm_vals)
    eval_scores['fid']['syn'].append(syn_score)
    eval_scores['fid']['gbm'].append(gbm_score)

to_pkl(f'results/{LOGIR_NAME}/eval_scores.pkl', eval_scores)

0it [00:00, ?it/s]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.150s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043509
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.095905
[t-SNE] KL divergence after 300 iterations: 1.199410
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043509
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.096836
[t-SNE] KL divergence after 300 iterations: 1.194389
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.002s...
[t-SNE] Computed neighbors for 1443 samples in 

1it [00:07,  7.74s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.127s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044476
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.039688
[t-SNE] KL divergence after 300 iterations: 1.136222
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.118s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044476
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.039696
[t-SNE] KL divergence after 300 iterations: 1.136123
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

2it [00:16,  8.29s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.120s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044055
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.083603
[t-SNE] KL divergence after 300 iterations: 1.140997
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044055
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.083687
[t-SNE] KL divergence after 300 iterations: 1.141191
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

3it [00:24,  8.23s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.127s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043843
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.791817
[t-SNE] KL divergence after 300 iterations: 1.260110
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.144s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043843
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.791462
[t-SNE] KL divergence after 300 iterations: 1.258698
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

4it [00:33,  8.33s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.145s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044319
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.187603
[t-SNE] KL divergence after 300 iterations: 1.211054
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.119s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044319
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.187706
[t-SNE] KL divergence after 300 iterations: 1.196168
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

5it [00:41,  8.30s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043866
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.303715
[t-SNE] KL divergence after 300 iterations: 1.224182
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.126s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043866
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.303268
[t-SNE] KL divergence after 300 iterations: 1.250059
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

6it [00:49,  8.26s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.130s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043559
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.005138
[t-SNE] KL divergence after 300 iterations: 1.167149
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043559
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.003784
[t-SNE] KL divergence after 300 iterations: 1.169638
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

7it [00:57,  8.28s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.120s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044191
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.978054
[t-SNE] KL divergence after 300 iterations: 1.135923
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044191
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.977982
[t-SNE] KL divergence after 300 iterations: 1.135841
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

8it [01:06,  8.30s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.123s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043851
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.045998
[t-SNE] KL divergence after 300 iterations: 1.142774
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.132s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043851
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.045048
[t-SNE] KL divergence after 300 iterations: 1.142534
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

9it [01:14,  8.37s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.130s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044471
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.227093
[t-SNE] KL divergence after 300 iterations: 1.244200
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.130s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044471
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.227100
[t-SNE] KL divergence after 300 iterations: 1.244649
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

10it [01:23,  8.47s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.128s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043916
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.058346
[t-SNE] KL divergence after 300 iterations: 1.151834
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.117s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043916
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.058472
[t-SNE] KL divergence after 300 iterations: 1.151394
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

11it [01:31,  8.39s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.120s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043988
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.163368
[t-SNE] KL divergence after 300 iterations: 1.156156
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.120s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043988
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.163391
[t-SNE] KL divergence after 300 iterations: 1.157029
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

12it [01:39,  8.32s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.122s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044346
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.272720
[t-SNE] KL divergence after 300 iterations: 1.153045
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.135s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044346
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.272579
[t-SNE] KL divergence after 300 iterations: 1.153078
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

13it [01:48,  8.32s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.142s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043670
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.076824
[t-SNE] KL divergence after 300 iterations: 1.139879
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.115s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043670
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.076767
[t-SNE] KL divergence after 300 iterations: 1.139657
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

14it [01:56,  8.33s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.118s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043972
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.883888
[t-SNE] KL divergence after 300 iterations: 1.147608
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.117s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043972
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.884079
[t-SNE] KL divergence after 300 iterations: 1.147370
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

15it [02:04,  8.31s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.133s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043981
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.924904
[t-SNE] KL divergence after 300 iterations: 1.133226
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.113s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043981
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.925079
[t-SNE] KL divergence after 300 iterations: 1.133075
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

16it [02:13,  8.39s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.042658
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.056198
[t-SNE] KL divergence after 300 iterations: 1.149919
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.112s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.042658
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.056301
[t-SNE] KL divergence after 300 iterations: 1.150040
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

17it [02:21,  8.33s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043744
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.106895
[t-SNE] KL divergence after 300 iterations: 1.261637
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.118s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043744
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.106998
[t-SNE] KL divergence after 300 iterations: 1.257564
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

18it [02:29,  8.26s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.119s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043181
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.869598
[t-SNE] KL divergence after 300 iterations: 1.153995
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.117s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043181
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.869606
[t-SNE] KL divergence after 300 iterations: 1.154074
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

19it [02:37,  8.25s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.114s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043324
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.853722
[t-SNE] KL divergence after 300 iterations: 1.237812
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.118s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043324
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.853626
[t-SNE] KL divergence after 300 iterations: 1.239735
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

20it [02:45,  8.30s/it]


# Run hypothesis tests

In [108]:
from scipy.stats import ttest_ind
eval_scores = from_pkl(f'results/{LOGIR_NAME}/eval_scores.pkl')

In [129]:
def hypothesis_test(syn_scores, gbm_scores):
    syn_scores = np.array(syn_scores)
    gbm_scores = np.array(gbm_scores)
    print("Synthetic:")
    print(f"\tmean  = {syn_scores.mean()}")
    print(f"\tstdev = {syn_scores.std(ddof=1)}")
    print("Benchmark:")
    print(f"\tmean  = {gbm_scores.mean()}")
    print(f"\tstdev = {gbm_scores.std(ddof=1)}")

    p_value = ttest_ind(syn_scores, gbm_scores, alternative='less').pvalue 
    print(f"p-value = {p_value}")

### PCA JS-Divergence

In [130]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['gbm'])

Synthetic:
	mean  = 1.707298352400694
	stdev = 0.21984733330331935
Benchmark:
	mean  = 2.9157585171986753
	stdev = 0.3160852346897349
p-value = 4.498388810066953e-48


### TSNE JS-Divergence

In [131]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['gbm'])

Synthetic:
	mean  = 0.11592539533843972
	stdev = 0.02346366183184622
Benchmark:
	mean  = 0.23539526730343305
	stdev = 0.0380389882490704
p-value = 2.1305533370886178e-41


### FID Score

In [132]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['gbm'])

Synthetic:
	mean  = 0.005537207299738115
	stdev = 0.0002602655675481465
Benchmark:
	mean  = 0.009741208312549928
	stdev = 0.0003352355637506943
p-value = 1.2785474743894853e-34
