In [71]:
import matplotlib.pyplot as plt
from torch.utils import data
import numpy as np
from tqdm import tqdm
import random
from visualisationMetrics import *
from dataLoader import *
from utils.utils import to_pkl, from_pkl, js_divergence, fid
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict

In [73]:
NSETS = 20
LOGIR_NAME = 'var'

if not os.path.exists(f'results/{LOGIR_NAME}/'):
    os.mkdir(f'results/{LOGIR_NAME}/')

# Load results

### Load real data

In [48]:
real_data = load_dataset(data_mode='Train')
real_dataloader = data.DataLoader(real_data, batch_size=1, num_workers=1, shuffle=True)

real_paths = []
real_conds = []

for i, (cond, sim) in enumerate(real_dataloader):
    sim = sim.cpu().detach().numpy()
    sim = sim.reshape(sim.shape[1], sim.shape[3])
    real_paths.append(sim)
    cond = cond.cpu().detach().numpy()
    cond = cond.reshape(cond.shape[1], cond.shape[3])
    real_conds.append(cond)

real_paths = np.array(real_paths)
real_conds = np.array(real_conds)
print(real_paths.shape)
print(real_conds.shape)

real_vals = np.transpose(real_paths, (0,2,1))
print(real_vals.shape)
to_pkl(f'results/real_vals.pkl', real_vals)

[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4328, 1, 1, 2), X_test's shape is (481, 1, 1, 2)
y_train's label shape is (4328, 1, 1, 43), y_test's label shape is (481, 1, 1, 43)
(481, 1, 43)
(481, 1, 2)
(481, 43, 1)


### Load synthetic data

In [74]:
from LoadSynthetic import *

In [75]:
def get_syn_vals(model_path=f'./logs/{LOGIR_NAME}/Model/checkpoint', n=10, **kwargs):
    syn_data = Synthetic_Dataset(model_path=model_path, n=n, dataset=real_data, seq_len=real_data.output_size, conditions_dim=real_data.X_train.shape[-1], **kwargs)
    dataloader = data.DataLoader(syn_data, batch_size=1, num_workers=1, shuffle=True, **kwargs)

    paths = []
    conds = []

    for i, (cond, sim) in enumerate(dataloader):
        sim = sim.cpu().detach().numpy()
        sim = sim.reshape(sim.shape[1], sim.shape[3])
        paths.append(sim)
        cond = cond.cpu().detach().numpy()
        cond = cond.reshape(cond.shape[1], cond.shape[3])
        conds.append(cond)
        
    paths = np.array(paths)
    conds = np.array(conds)
    vals = np.transpose(paths, (0,2,1))
    np.random.shuffle(vals)
    return vals

def get_syn_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [76]:
syn_val_sets = get_syn_val_sets(nsets=NSETS)
to_pkl(f'results/{LOGIR_NAME}/syn_val_sets.pkl', syn_val_sets)

### Load benchmark data

In [52]:
from utils.utils import GBM_Simulator

In [53]:
def get_gbm_vals(n=10, **kwargs):
    simulator = GBM_Simulator(dataset=real_data, nsamples=n)
    paths = simulator.run()
    paths = np.reshape(paths, (-1,1,paths.shape[-1]))

    vals = np.transpose(paths, (0, 2, 1))
    np.random.shuffle(vals)
    return vals

def get_gbm_val_sets(nsets, **kwargs):
    val_sets = []
    for i in range(nsets):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [54]:
gbm_val_sets = get_gbm_val_sets(nsets=NSETS)
to_pkl(f'results/gbm_val_sets.pkl', gbm_val_sets)

# Run evaluations

In [77]:
real_vals = from_pkl(f'results/real_vals.pkl')
syn_val_sets = from_pkl(f'results/{LOGIR_NAME}/syn_val_sets.pkl')
gbm_val_sets = from_pkl(f'results/gbm_val_sets.pkl')

In [78]:
eval_scores = {
    'js_pca': {
        'syn': [],
        'gbm': []
    },
    'js_tsne': {
        'syn': [],
        'gbm': []
    },
    'fid': {
        'syn': [],
        'gbm': []
    }
}

In [79]:
def js_div(real_vals, syn_vals, gbm_vals, mode, n_components=10, **kwargs):
    real, syn, gbm = dim_reduction([real_vals, syn_vals, gbm_vals], n_components=n_components, mode=mode, **kwargs)
    syn_score = js_divergence(real, syn, verbose=False)
    gbm_score = js_divergence(real, gbm, verbose=False)
    return syn_score, gbm_score

In [80]:
for syn_vals, gbm_vals in tqdm(zip(syn_val_sets, gbm_val_sets)):
    for iter in range(3):
        # JS-PCA
        syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='pca')
        eval_scores['js_pca']['syn'].append(syn_score)
        eval_scores['js_pca']['gbm'].append(gbm_score)
        
        # JS-TSNE
        syn_score, gbm_score = js_div(real_vals, syn_vals, gbm_vals, mode='tsne')
        eval_scores['js_tsne']['syn'].append(syn_score)
        eval_scores['js_tsne']['gbm'].append(gbm_score)
    
    # FID
    syn_score = fid(real_vals, syn_vals)
    gbm_score = fid(real_vals, gbm_vals)
    eval_scores['fid']['syn'].append(syn_score)
    eval_scores['fid']['gbm'].append(gbm_score)

to_pkl(f'results/{LOGIR_NAME}/eval_scores.pkl', eval_scores)

0it [00:00, ?it/s]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.002s...
[t-SNE] Computed neighbors for 1443 samples in 0.146s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043239
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.151970
[t-SNE] KL divergence after 300 iterations: 1.257519
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.126s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043239
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.151867
[t-SNE] KL divergence after 300 iterations: 1.176743
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

1it [00:07,  7.46s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.131s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044197
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.915894
[t-SNE] KL divergence after 300 iterations: 1.142146
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044197
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.916016
[t-SNE] KL divergence after 300 iterations: 1.142514
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

2it [00:15,  7.96s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.124s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043925
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.040787
[t-SNE] KL divergence after 300 iterations: 1.145016
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043925
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.041096
[t-SNE] KL divergence after 300 iterations: 1.144630
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

3it [00:24,  8.11s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.115s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044174
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.813660
[t-SNE] KL divergence after 300 iterations: 1.164260
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.127s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044174
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.814171
[t-SNE] KL divergence after 300 iterations: 1.167822
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

4it [00:32,  8.15s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.130s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044004
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.001556
[t-SNE] KL divergence after 300 iterations: 1.260031
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.136s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044004
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.002079
[t-SNE] KL divergence after 300 iterations: 1.273385
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

5it [00:41,  8.42s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.122s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043673
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.911774
[t-SNE] KL divergence after 300 iterations: 1.152778
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.131s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043673
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.911915
[t-SNE] KL divergence after 300 iterations: 1.152907
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

6it [00:49,  8.36s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.123s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043594
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.193027
[t-SNE] KL divergence after 300 iterations: 1.213035
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.136s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043594
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.193077
[t-SNE] KL divergence after 300 iterations: 1.209943
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

7it [00:58,  8.54s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.140s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044172
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.241001
[t-SNE] KL divergence after 300 iterations: 1.146409
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.136s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044172
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.241219
[t-SNE] KL divergence after 300 iterations: 1.146343
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

8it [01:07,  8.87s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.130s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044045
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.077137
[t-SNE] KL divergence after 300 iterations: 1.140368
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.132s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044045
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.077705
[t-SNE] KL divergence after 300 iterations: 1.140323
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

9it [01:17,  8.96s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044281
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.154217
[t-SNE] KL divergence after 300 iterations: 1.149751
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.123s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044281
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.154018
[t-SNE] KL divergence after 300 iterations: 1.149569
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

10it [01:25,  8.73s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.120s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043689
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.078690
[t-SNE] KL divergence after 300 iterations: 1.228680
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.123s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043689
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.078793
[t-SNE] KL divergence after 300 iterations: 1.230539
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

11it [01:33,  8.63s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.119s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043282
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.037380
[t-SNE] KL divergence after 300 iterations: 1.152056
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043282
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.036934
[t-SNE] KL divergence after 300 iterations: 1.151733
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

12it [01:41,  8.48s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.133s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044399
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.965885
[t-SNE] KL divergence after 300 iterations: 1.155299
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.115s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044399
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.964943
[t-SNE] KL divergence after 300 iterations: 1.154970
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

13it [01:49,  8.39s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.124s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043826
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.975708
[t-SNE] KL divergence after 300 iterations: 1.136909
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.122s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043826
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.976185
[t-SNE] KL divergence after 300 iterations: 1.136968
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

14it [01:58,  8.30s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043551
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.117790
[t-SNE] KL divergence after 300 iterations: 1.158209
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.116s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043551
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.117767
[t-SNE] KL divergence after 300 iterations: 1.158257
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

15it [02:06,  8.35s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.127s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044520
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.949387
[t-SNE] KL divergence after 300 iterations: 1.126028
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.129s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044520
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.948807
[t-SNE] KL divergence after 300 iterations: 1.126151
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

16it [02:16,  8.74s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.132s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043722
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.896347
[t-SNE] KL divergence after 300 iterations: 1.160437
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.131s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043722
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.897102
[t-SNE] KL divergence after 300 iterations: 1.158682
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

17it [02:24,  8.65s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 0.128s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044205
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.846684
[t-SNE] KL divergence after 300 iterations: 1.166477
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.126s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.044205
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.846645
[t-SNE] KL divergence after 300 iterations: 1.166839
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.001s...
[t-SNE] Computed neighbors for 1443 samples in 

18it [02:32,  8.51s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.118s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043973
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.763893
[t-SNE] KL divergence after 300 iterations: 1.152760
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.124s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043973
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.763439
[t-SNE] KL divergence after 300 iterations: 1.151872
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

19it [02:41,  8.65s/it]

[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.123s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043885
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.026493
[t-SNE] KL divergence after 300 iterations: 1.142973
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 0.125s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1443
[t-SNE] Computed conditional probabilities for sample 1443 / 1443
[t-SNE] Mean sigma: 0.043885
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.026684
[t-SNE] KL divergence after 300 iterations: 1.142979
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 1443 samples in 0.000s...
[t-SNE] Computed neighbors for 1443 samples in 

20it [02:50,  8.52s/it]


# Run hypothesis tests

In [81]:
from scipy.stats import ttest_ind
eval_scores = from_pkl(f'results/{LOGIR_NAME}/eval_scores.pkl')

In [82]:
def hypothesis_test(syn_scores, gbm_scores):
    syn_scores = np.array(syn_scores)
    gbm_scores = np.array(gbm_scores)
    print("Synthetic:")
    print(f"\tmean  = {syn_scores.mean()}")
    print(f"\tstdev = {syn_scores.std()}")
    print("Benchmark:")
    print(f"\tmean  = {gbm_scores.mean()}")
    print(f"\tstdev = {gbm_scores.std()}")

### PCA JS-Divergence

In [83]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['gbm'])

Synthetic:
	mean  = 1.6576441169490852
	stdev = 0.2065909741205673
Benchmark:
	mean  = 2.919550678308244
	stdev = 0.3297409229289862


### TSNE JS-Divergence

In [84]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['gbm'])

Synthetic:
	mean  = 0.11182642067340585
	stdev = 0.013656558060486339
Benchmark:
	mean  = 0.23721891164765802
	stdev = 0.03350305663559553


### FID Score

In [85]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['gbm'])

Synthetic:
	mean  = 0.004907154425217041
	stdev = 0.00026894137654578075
Benchmark:
	mean  = 0.009741208312549928
	stdev = 0.00032674720969441126
