In [59]:
import matplotlib.pyplot as plt
from torch.utils import data
import numpy as np
from tqdm import tqdm
import random
from visualisationMetrics import *
from dataLoader import *
from utils.utils import *
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict
from tqdm import tqdm
from IPython.utils import io

In [75]:
NSETS = 20
LOGDIR = 'reweighted_with_var'

# Load results

### Load real data

In [3]:
real_data = load_dataset(data_mode='Train')
real_dataloader = data.DataLoader(real_data, batch_size=1, num_workers=1, shuffle=True)

[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4344, 1, 1, 3), X_test's shape is (485, 1, 1, 3)
y_train's label shape is (4344, 1, 1, 43), y_test's label shape is (485, 1, 1, 43)


In [4]:
real_paths = []
real_conds = []

for i, (cond, sim) in enumerate(real_dataloader):
    sim = sim.cpu().detach().numpy()
    sim = sim.reshape(sim.shape[1], sim.shape[3])
    real_paths.append(sim)
    cond = cond.cpu().detach().numpy()
    cond = cond.reshape(cond.shape[1], cond.shape[3])
    real_conds.append(cond)

real_paths = np.array(real_paths)
real_conds = np.array(real_conds)
print(real_paths.shape)
print(real_conds.shape)

real_vals = np.transpose(real_paths, (0,2,1))
print(real_vals.shape)
to_pkl(f'results/real_vals.pkl', real_vals)

(485, 1, 43)
(485, 1, 3)
(485, 43, 1)


### Load synthetic data

In [52]:
from LoadSynthetic import *

In [53]:
def get_syn_vals(model_path=f'./logs/{LOGDIR}', n=10, **kwargs):
    syn_data = Synthetic_Dataset(model_path=model_path, n=n, dataset=real_data, **kwargs)
    dataloader = data.DataLoader(syn_data, batch_size=1, num_workers=1, shuffle=True, **kwargs)

    paths = []
    conds = []

    for i, (cond, sim) in enumerate(dataloader):
        sim = sim.cpu().detach().numpy()
        sim = sim.reshape(sim.shape[1], sim.shape[3])
        paths.append(sim)
        cond = cond.cpu().detach().numpy()
        cond = cond.reshape(cond.shape[1], cond.shape[3])
        conds.append(cond)
        
    paths = np.array(paths)
    conds = np.array(conds)
    vals = np.transpose(paths, (0,2,1))
    np.random.shuffle(vals)
    return vals

def get_syn_val_sets(nsets, **kwargs):
    val_sets = []
    for i in tqdm(range(nsets)):
        val_sets.append(get_syn_vals(**kwargs))
    return val_sets

In [54]:
syn_val_sets = get_syn_val_sets(nsets=NSETS)
to_pkl(f'logs/{LOGDIR}/results/syn_val_sets.pkl', syn_val_sets)

100%|██████████| 20/20 [06:55<00:00, 20.79s/it]


### Load benchmark data

In [73]:
def get_benchmark_vals(n=10, mode='gbm', **kwargs):
    if mode=='gbm':
        simulator = GBM_Simulator(dataset=real_data, nsamples=n)
    elif mode=='cev':
        simulator = CEV_Simulator(dataset=real_data, nsamples=n)
    elif mode=='heston':
        simulator = Heston_Simulator(dataset=real_data, nsamples=n)
    else:
        raise NotImplementedError
    paths = simulator.run()
    paths = np.reshape(paths, (-1,1,paths.shape[-1]))

    vals = np.transpose(paths, (0, 2, 1))
    np.random.shuffle(vals)
    return vals

def get_benchmark_val_sets(nsets, mode='gbm', **kwargs):
    val_sets = []
    for i in tqdm(range(nsets)):
        val_sets.append(get_benchmark_vals(mode=mode, **kwargs))
    return val_sets

In [None]:
for mode in ['gbm', 'cev', 'heston']:
    benchmark_val_sets = get_benchmark_val_sets(mode=mode, nsets=NSETS)
    to_pkl(f'results/{mode}_val_sets.pkl', benchmark_val_sets)

# Run evaluations

In [76]:
real_vals = from_pkl(f'results/real_vals.pkl')
syn_val_sets = from_pkl(f'logs/{LOGDIR}/results/syn_val_sets.pkl')
gbm_val_sets = from_pkl(f'results/gbm_val_sets.pkl')
cev_val_sets = from_pkl(f'results/cev_val_sets.pkl')
heston_val_sets = from_pkl(f'results/heston_val_sets.pkl')

In [56]:
eval_scores = {
    'js_pca': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    },
    'js_tsne': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    },
    'fid': {
        'syn': [],
        'gbm': [],
        'cev': [],
        'heston': []
    }
}

In [57]:
def js_div(real_vals, other_vals_list, mode, n_components=10, **kwargs):
    
    vals_list = dim_reduction([real_vals]+other_vals_list, n_components=n_components, mode=mode, **kwargs)

    scores = []
    for i in range(1,len(other_vals_list)+1):
        scores.append(js_divergence(vals_list[0], vals_list[i], verbose=False))
    return scores

In [60]:
for syn_vals, gbm_vals, cev_vals, heston_vals in tqdm(zip(syn_val_sets, gbm_val_sets, cev_val_sets, heston_val_sets)):
    with io.capture_output() as o:
        for iter in range(3):
            # JS-PCA
            syn_score, gbm_score, cev_score, heston_score  = js_div(real_vals, [syn_vals, gbm_vals, cev_vals, heston_vals], mode='pca')
            eval_scores['js_pca']['syn'].append(syn_score)
            eval_scores['js_pca']['gbm'].append(gbm_score)
            eval_scores['js_pca']['cev'].append(cev_score)
            eval_scores['js_pca']['heston'].append(heston_score)
            
            # JS-TSNE
            syn_score, gbm_score, cev_score, heston_score = js_div(real_vals, [syn_vals, gbm_vals, cev_vals, heston_vals], mode='tsne')
            eval_scores['js_tsne']['syn'].append(syn_score)
            eval_scores['js_tsne']['gbm'].append(gbm_score)
            eval_scores['js_tsne']['cev'].append(cev_score)
            eval_scores['js_tsne']['heston'].append(heston_score)
        
        # FID
        syn_score = fid(real_vals, syn_vals)
        gbm_score = fid(real_vals, gbm_vals)
        cev_score = fid(real_vals, cev_vals)
        heston_score = fid(real_vals, heston_vals)
        eval_scores['fid']['syn'].append(syn_score)
        eval_scores['fid']['gbm'].append(gbm_score)
        eval_scores['fid']['cev'].append(cev_score)
        eval_scores['fid']['heston'].append(heston_score)

to_pkl(f'logs/{LOGDIR}/results/eval_scores.pkl', eval_scores)

20it [05:43, 17.17s/it]


# Run hypothesis tests

In [77]:
from scipy.stats import ttest_ind
eval_scores = from_pkl(f'logs/{LOGDIR}/results/eval_scores.pkl')

In [78]:
def hypothesis_test(syn_scores, gbm_scores):
    syn_scores = np.array(syn_scores)
    gbm_scores = np.array(gbm_scores)
    print("Synthetic:")
    print(f"\tmean  = {syn_scores.mean()}")
    print(f"\tstdev = {syn_scores.std(ddof=1)}")
    print("Benchmark:")
    print(f"\tmean  = {gbm_scores.mean()}")
    print(f"\tstdev = {gbm_scores.std(ddof=1)}")

    p_value = ttest_ind(syn_scores, gbm_scores, alternative='less').pvalue 
    print(f"p-value = {p_value}")

### PCA JS-Divergence

In [79]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['gbm'])

Synthetic:
	mean  = 6.1102822580830445
	stdev = 0.4233422107915738
Benchmark:
	mean  = 7.283053824594976
	stdev = 0.5052557238577574
p-value = 1.81174700602567e-27


In [80]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['cev'])

Synthetic:
	mean  = 6.1102822580830445
	stdev = 0.4233422107915738
Benchmark:
	mean  = 6.300064272285185
	stdev = 0.4968674347868786
p-value = 0.011891359374174631


In [81]:
hypothesis_test(eval_scores['js_pca']['syn'], eval_scores['js_pca']['heston'])

Synthetic:
	mean  = 6.1102822580830445
	stdev = 0.4233422107915738
Benchmark:
	mean  = 6.0580375112178295
	stdev = 0.41606110392949686
p-value = 0.7552032310461292


### TSNE JS-Divergence

In [82]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['gbm'])

Synthetic:
	mean  = 0.14232387857533355
	stdev = 0.032836291677329536
Benchmark:
	mean  = 0.24009898403048244
	stdev = 0.0334652351583524
p-value = 1.7501411460885418e-32


In [83]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['cev'])

Synthetic:
	mean  = 0.14232387857533355
	stdev = 0.032836291677329536
Benchmark:
	mean  = 0.17432832832275003
	stdev = 0.027913822819344083
p-value = 2.7486471299147867e-08


In [84]:
hypothesis_test(eval_scores['js_tsne']['syn'], eval_scores['js_tsne']['heston'])

Synthetic:
	mean  = 0.14232387857533355
	stdev = 0.032836291677329536
Benchmark:
	mean  = 0.17473569447298473
	stdev = 0.03601983549484815
p-value = 4.2686253658828756e-07


### FID Score

In [85]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['gbm'])

Synthetic:
	mean  = 0.005790042889375799
	stdev = 9.121297573318986e-05
Benchmark:
	mean  = 0.007541947840446617
	stdev = 0.0005758004657698565
p-value = 2.567448285616035e-16


In [86]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['cev'])

Synthetic:
	mean  = 0.005790042889375799
	stdev = 9.121297573318986e-05
Benchmark:
	mean  = 0.004560605563891268
	stdev = 0.00037142154675998525
p-value = 1.0


In [87]:
hypothesis_test(eval_scores['fid']['syn'], eval_scores['fid']['heston'])

Synthetic:
	mean  = 0.005790042889375799
	stdev = 9.121297573318986e-05
Benchmark:
	mean  = 0.003090025245840583
	stdev = 0.00043690669595794647
p-value = 1.0
