In [None]:
import numpy as np
import sys
sys.path.append('../tools')
from utils import get_shaps
from scipy.stats import spearmanr, pearsonr, sem
from scipy import spatial

%load_ext autoreload
%autoreload 2

In [None]:
task = 'classification'
dataset_name = 'breast_cancer'
num_datapoints = 50

In [None]:
model_name = 'logistic'
metric = 'accuracy'
seed = 2022
repeat_num = 20

In [None]:
num_samples = 50
num_samples_true = 500
xis = [1e1, 1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]

In [None]:
path = '../experiment_data/xi'
method = 'random'

true_mcs_list = np.load(f"{path}/xi_est_{dataset_name}_{model_name}_{num_samples_true}_{1e-3}.npy", allow_pickle=True)
est_mcs_list_xi = []
for xi in xis:
    est_mcs_list_xi.append((xi, np.load(f"{path}/ri_mape_est_{dataset_name}_{model_name}_{num_samples}_{xi}.npy", allow_pickle=True)))

In [None]:
true_shaps = np.zeros(num_datapoints)
for true_mcs in true_mcs_list:
    shaps = np.asarray(get_shaps(true_mcs))
    true_shaps += shaps / repeat_num
print(true_shaps)

In [None]:
for xi, est_mcs_list in est_mcs_list_xi:
    spear_coeffs = []
    pearson_coeffs = []
    cos_sim = []

    for est_mcs in est_mcs_list:
        est_shaps = get_shaps(est_mcs)
        # absolute percentage error (not mean)
        mape = abs((abs(est_shaps - true_shaps) + 1e-5) / (true_shaps + 1e-5))
        mape = mape ** 0.5
        mse = (est_shaps - true_shaps)**2
        r = []
        for i in range(len(true_shaps)):
            mc_i = np.asarray([item[0] for item in est_mcs[i]])
            s2 = np.var(mc_i, ddof=1) + 1e-5
            mu = true_shaps[i]
            r.append((abs(mu)+xi)**2/s2)
        
        metric = mape
        r = np.asarray(r)

        spear_coeffs.append(spearmanr(r, 1/metric).correlation)
        pearson_coeffs.append(pearsonr(r, 1/metric)[0])
        cos_sim.append(1 - spatial.distance.cosine(r, 1/metric))

    spear_coeffs = np.asarray(spear_coeffs)
    print("xi: %.10f; Spearman %.4f \pm %.4f" % (xi, spear_coeffs.mean(), sem(spear_coeffs)), end=' ')

    pearson_coeffs = np.asarray(pearson_coeffs)
    print("Pearson %.4f \pm %.4f" % (pearson_coeffs.mean(), sem(pearson_coeffs)), end=' ')

    cos_sim = np.asarray(cos_sim)
    print("Cosine %.4f \pm %.4f" % (cos_sim.mean(), sem(cos_sim)))