In [1]:
from similarity.dataset_similarity_metrics import c2st_accuracy
from similarity.BoP import BoP
import numpy as np
import matplotlib.pyplot as plt
from datahandler import load_data
from utils import dict_to_np, dps_to_np
from sklearn.neighbors import KDTree 

In [2]:
dataname = "complex9"
parameters = 0
gen_folder = "./gen_data"
mc_folder = "./mc_data"
bop_folder = "./bop"
bop_centroids = 100

In [3]:
true_X, true_y = load_data(dataname)
datalength = len(true_y)

In [4]:
print(true_X.shape)
print(true_y.shape)
print(datalength)

(3031, 2)
(3031,)
3031


In [5]:
def get_mc_centers(dataname, parameters, timestep, mc_folder):
    mcs = np.load(f"./{mc_folder}/mcs_{dataname}_clustream_1000_100_1000_False_{parameters}_{timestep}.npy", allow_pickle=True)
    mc_centers = []
    for mc in mcs:
        mc_center = dict_to_np(mc[1])
        mc_centers.append(mc_center)
    return mc_centers

In [6]:
def get_gen_data(dataname, parameters, timestep, gen_folder, gen_type):
    data = np.load(f"./{gen_folder}/data_{dataname}_{gen_type}_1000_100_1000_False_{parameters}_{timestep}.npy", allow_pickle=True)
    return data

In [8]:
for offline_type in ["clustream", "wclustream", "scope_full", "scope"]:
    bop_jsd_sum = 0
    c2st_real_sum = 0
    c2st_offline_sum = 0
    nndists_sum = 0
    for i in range(1000, datalength+1000, 1000):
        cur_ts = min(i, datalength)
        real_subset = true_X[i-1000:i]
        #print(bop_subset.ref_bop)
        #print(real_subset[0])
        offline_data = []
        if offline_type == "clustream":
            offline_data = get_mc_centers(dataname, parameters, cur_ts, mc_folder)
            offline_data = np.array(offline_data)
        else:
            offline_data = get_gen_data(dataname, parameters, cur_ts, gen_folder, offline_type)
            offline_data = np.array(offline_data)
        bop_subset = BoP(offline_data, bop_centroids, f"{bop_folder}/{dataname}/{offline_type}/{i}")
        #print(offline_data[0])
        bop_scores = bop_subset.evaluate(real_subset)
        bop_jsd_sum += bop_scores['JS']*len(real_subset)
        c2st_real, c2st_offline = c2st_accuracy(real_subset,offline_data)
        c2st_real_sum += c2st_real*len(real_subset)
        c2st_offline_sum += c2st_offline*len(real_subset)
        kdtree = KDTree(offline_data)
        nndists, _ = kdtree.query(real_subset)
        #print(nndists)
        nndist_avg = np.sum(nndists)/len(nndists)
        nndists_sum += np.sum(nndists)
        print(f"\t{offline_type} {cur_ts} BoP: {bop_scores['JS']:.3f} C2ST-R: {c2st_real:.3f} C2ST-O: {c2st_offline:.3f} NN-dist {nndist_avg:.3f}")
    
    bop_jsd_avg = bop_jsd_sum/datalength
    c2st_real_avg = c2st_real_sum/datalength
    c2st_offline_avg = c2st_offline_sum/datalength
    nndists_avg = nndists_sum/datalength
    print(f"{offline_type} BoP {bop_jsd_sum/datalength:.3f} C2ST-R: {c2st_real_avg:.3f} C2ST-O: {c2st_offline_avg:.3f} NN-dist {nndists_avg:.4f}")
        #BoP()

	clustream 1000 BoP: 0.035 C2ST-R: 0.887 C2ST-O: 0.000 NN-dist 0.025
	clustream 2000 BoP: 0.030 C2ST-R: 0.859 C2ST-O: 0.000 NN-dist 0.028
	clustream 3000 BoP: 0.027 C2ST-R: 0.881 C2ST-O: 0.000 NN-dist 0.026
	clustream 3031 BoP: 0.352 C2ST-R: 0.032 C2ST-O: 0.560 NN-dist 0.025
clustream BoP 0.034 C2ST-R: 0.867 C2ST-O: 0.006 NN-dist 0.0262
	wclustream 1000 BoP: 0.003 C2ST-R: 0.887 C2ST-O: 0.995 NN-dist 0.025
	wclustream 2000 BoP: 0.084 C2ST-R: 0.860 C2ST-O: 0.975 NN-dist 0.028
	wclustream 3000 BoP: 0.020 C2ST-R: 0.882 C2ST-O: 0.993 NN-dist 0.026
	wclustream 3031 BoP: 0.333 C2ST-R: 0.032 C2ST-O: 0.999 NN-dist 0.025
wclustream BoP 0.039 C2ST-R: 0.868 C2ST-O: 0.987 NN-dist 0.0262
	scope_full 1000 BoP: 0.004 C2ST-R: 0.464 C2ST-O: 0.503 NN-dist 0.011
	scope_full 2000 BoP: 0.043 C2ST-R: 0.520 C2ST-O: 0.579 NN-dist 0.014
	scope_full 3000 BoP: 0.015 C2ST-R: 0.440 C2ST-O: 0.478 NN-dist 0.011
	scope_full 3031 BoP: 0.336 C2ST-R: 0.000 C2ST-O: 0.971 NN-dist 0.011
scope_full BoP 0.024 C2ST-R: 0.470 C2