In [1]:
import numpy as np
import sys
import scipy.stats as st
import matplotlib.pyplot as plt
from pathlib import Path
# setting path
sys.path.append('../topotests/')
sys.path.append('../2DKS/')
from topotests import TopoTest
from distributions import MultivariateDistribution, GaussianMixture, AbsoluteDistribution
from KS2D import ks2d1s
import pandas as pd

In [8]:
def gof_tests(samples, cdf):
    # see Justel1997 Table 1 and Table 2
    Dstar = 0.1986
    def cdf_wrapper(x,y):
        return cdf((x,y))
    ks = []
    for sample in samples:
        ks_out = ks2d1s(sample, cdf_wrapper)
        print(ks_out)
        ks.append(ks_out[0] > Dstar)
    ks = np.sum(ks)/len(ks)
    return ks

In [3]:
def run_mc(N, rvs):
    # generate representation for standard normal distribution
    topo_test = TopoTest(n=N, dim=dim, method=method, 
                         wasserstein_p=wasserstein_p, wasserstein_order=wasserstein_order)
    
    results = []
    result_labels = ['true_distrib', 'alter_distrib', 'method', 'sign_level', 'wasserstein_p', 'wasserstein_order',
                 'mc_loops', 'n_signature', 'n_test', 
                 'topo_min', 'topo_mean', 'topo_max', 'topo_quantile',
                 'ks']
    
    for rv_true in rvs:
        topo_test.fit(rv=rv_true, n_signature=n_signature, n_test=n_test)
        # write signature distance matrix
        topo_test.save_distance_matrix(outputfile_basename+f'_N={N}_{rv_true.label}_signature_distance_matrix.npy')
        for rv_alter in rvs:
            # generate samples
            samples = [rv_alter.rvs(N) for i in range(mc_samples)]
            # perform topo tests
            topo_out = topo_test.predict(samples)
            # write representation distance matrix
            topo_test.save_predict_distance_matrix(outputfile_basename+f'_N={N}_{rv_true.label}-{rv_alter.label}_distance_matrix.npy')
            # aggregate results of topo tests
            topo_min = np.mean(topo_out.min)
            topo_mean = np.mean(topo_out.mean)
            topo_max = np.mean(topo_out.max)
            topo_quantile = np.mean(topo_out.quantile)
            # collect results of KS test
            ks = gof_tests(samples, cdf=rv_true.cdf)
            # collect results of topo tests and goodness of fit (gof) tests
            result = [rv_true.label, rv_alter.label, method, significance_level, wasserstein_p, wasserstein_order, 
                      mc_samples, n_signature, n_test, 
                      topo_min, topo_mean, topo_max, topo_quantile,
                      ks]
            results.append(result)
            # save results to .csv file
            results_df = pd.DataFrame(results, columns=result_labels)
            results_df.to_csv(f'{outputfile_basename}_N={N}.csv')
    return results

In [4]:
rvs = [MultivariateDistribution([st.norm(), st.norm()], label='N01xN01'),
       MultivariateDistribution([st.t(df=3), st.t(df=3)], label='T3xT3'),
       MultivariateDistribution([st.t(df=5), st.t(df=5)], label='T5xT5'),
       MultivariateDistribution([st.t(df=10), st.t(df=10)], label='T10xT10'),
       MultivariateDistribution([st.logistic(), st.logistic()], label='LogisticxLogistic'),
       MultivariateDistribution([st.laplace(), st.laplace()], label='LaplacexLaplace'),
       MultivariateDistribution([st.norm(), st.t(df=5)], label='N01xT5'),
       MultivariateDistribution([GaussianMixture([-1, 1], [1, 1], [0.5, 0.5]),
                                GaussianMixture([-1, 1], [1, 1], [0.5, 0.5])], label='GM_1xGM_1'),
       MultivariateDistribution([st.norm(),
                                GaussianMixture([-1, 1], [1, 1], [0.5, 0.5])], label='N01xGM_1')      
      ]

In [5]:
# set random numbers generator seed to have reproducibale results
np.random.seed(1)

# set simulation parameters
Ns = [50]
mc_samples = 10
n_signature = n_test = 10
method = 'mergegram'

dim = 2
significance_level = 0.05
wasserstein_p=1
wasserstein_order=1

outputfile_basename = f'results.{dim}d/{method}_{wasserstein_p}_{wasserstein_order}'

In [None]:
for N in Ns:
    results = run_mc(N=N, rvs=rvs)

(50, 2)
(0.793, 0.0)
(50, 2)
(0.6252, 2.070952237037191e-13)
(50, 2)
(0.7837000000000001, 0.0)
(50, 2)
(0.7288000000000001, 0.0)
(50, 2)
(1.2621, 0.0)
(50, 2)
(0.8484999999999999, 0.0)
(50, 2)
(1.3101, 0.0)
(50, 2)
(0.49970000000000003, 8.514452948411177e-09)
(50, 2)
(0.4715, 8.33311476995481e-08)
(50, 2)
(0.5838, 9.125657536451117e-12)
(50, 2)
(2.8328, 0.0)
(50, 2)
(2.8593, 0.0)
(50, 2)
(19.487, 0.0)
(50, 2)
(1.7907, 0.0)
(50, 2)
(1.7629000000000001, 0.0)
(50, 2)
(4.2812, 0.0)
(50, 2)
(3.3537, 0.0)
(50, 2)
(1.2545000000000002, 0.0)
(50, 2)
(4.4474, 0.0)
(50, 2)
(2.0052000000000003, 0.0)
(50, 2)
(2.0586, 0.0)
(50, 2)
(1.9314999999999998, 0.0)
(50, 2)
(2.0321000000000002, 0.0)
(50, 2)
(0.8506, 0.0)
(50, 2)
(0.8101, 0.0)
(50, 2)
(1.0219, 0.0)
(50, 2)
(1.4532000000000003, 0.0)
(50, 2)
(2.4626, 0.0)
(50, 2)
(0.6395, 5.230989363581277e-14)
(50, 2)
(3.1739, 0.0)
(50, 2)
(0.7412999999999998, 0.0)
(50, 2)
(0.7588, 0.0)
(50, 2)
(1.7960000000000003, 0.0)
(50, 2)
(0.6271, 1.641324752308328e-13)
(