In [None]:
import numpy as np

import sys
sys.path.append('../tools')
from utils import get_shaps
from scipy.stats import spearmanr, pearsonr, sem
from scipy import spatial

%load_ext autoreload
%autoreload 2

In [None]:
task = 'classification'
dataset_name = 'covertype'
num_datapoints = 50

In [None]:
model_name = 'logistic'
metric = 'accuracy'
seed = 2022
repeat_num = 20

In [None]:
num_samples = 50
num_samples_true = 500
xi = 1e-3

In [None]:
path = '../experiment_data/ri_ape'
method = 'random'

true_mcs_list = np.load(f"{path}/ri_ape_est_{dataset_name}_{model_name}_{num_samples_true}.npy", allow_pickle=True)
est_mcs_list = np.load(f"{path}/ri_ape_est_{dataset_name}_{model_name}_{num_samples}.npy", allow_pickle=True)

In [None]:
true_shaps = np.zeros(num_datapoints)
for true_mcs in true_mcs_list:
    shaps = np.asarray(get_shaps(true_mcs))
    true_shaps += shaps / repeat_num
print(true_shaps)

In [None]:
"""
Quantify the relationship using Spearman Rank Coefficient
"""

spear_coeffs = []
pearson_coeffs = []
cos_sim = []

all_r = []
all_inv_ape = []
all_inv_mse = []

for est_mcs in est_mcs_list:
    est_shaps = get_shaps(est_mcs)
    # absolute percentage error (not mean)
    ape = abs((abs(est_shaps - true_shaps) + 1e-5) / (true_shaps + 1e-5))
    ape = ape ** 0.5
    mse = (est_shaps - true_shaps)**2
    r = []
    for i in range(len(true_shaps)):
        mc_i = np.asarray([item[0] for item in est_mcs[i]])
        s2 = np.var(mc_i, ddof=1) + 1e-5
        mu = true_shaps[i]
        r.append((abs(mu)+xi)**2/s2)
    
    metric = ape
    r = np.asarray(r)
    r *= len(est_mcs[0])

    spear_coeffs.append(spearmanr(r, 1/metric).correlation)
    pearson_coeffs.append(pearsonr(r, 1/metric)[0])
    cos_sim.append(1 - spatial.distance.cosine(r, 1/metric))

    all_r.append(r)
    all_inv_ape.append(1/ape)
    all_inv_mse.append(1/mse)

In [None]:
spear_coeffs = np.asarray(spear_coeffs)
print("%.3f \pm %.3f" % (spear_coeffs.mean(), sem(spear_coeffs)))

In [None]:
pearson_coeffs = np.asarray(pearson_coeffs)
print("%.3f \pm %.3f" % (pearson_coeffs.mean(), sem(pearson_coeffs)))

In [None]:
cos_sim = np.asarray(cos_sim)
print("%.3f \pm %.3f" % (cos_sim.mean(), sem(cos_sim)))

In [None]:
linestyles = ['solid', 'dashed', 'dotted', 'dashdot', (0, (1, 1)), (0, (1, 5))]

In [None]:
all_r = np.asarray(all_r)
r_means = all_r.mean(axis=0)
r_se = sem(all_r, axis=0)

all_inv_metric = all_inv_ape

all_inv_metric = np.asarray(all_inv_metric)
inv_metric_means = all_inv_metric.mean(axis=0)
inv_metric_se = sem(all_inv_metric, axis=0)

indicies = np.argsort(inv_metric_means)

r_means = r_means[indicies]
r_se = r_se[indicies]
inv_metric_means = inv_metric_means[indicies]
inv_metric_se = inv_metric_se[indicies]

sys.path.append('../')
from vol_utils.utils import set_up_plotting
plt = set_up_plotting()
fig, ax1 = plt.subplots(figsize=(8,6))

ln1 = ax1.plot(r_means, label=r'$f_i$($\xi$ = 1e-3)', c='C0', linestyle=linestyles[0])
ax1.fill_between(np.arange(len(r_means)), r_means-r_se, r_means+r_se, alpha=0.3, color='C0')
ax1.set_xlabel("Training Example Index")
ax1.set_ylabel(r'$f_i$')
ax1.set_xticks([i for i in range(len(r_means))])
xticks = [0, 9, 19, 29, 39, 49]
ax1.set_xticks(xticks)
ax1.set_xticklabels([i+1 for i in xticks])
ax2 = ax1.twinx()

ln2 = ax2.plot(inv_metric_means, label=r'$\sqrt{1/APE}$', c='C1', linestyle=linestyles[1])
ax2.fill_between(np.arange(len(inv_metric_means)), inv_metric_means-inv_metric_se, inv_metric_means+inv_metric_se, alpha=0.3, color='C1')
ax2.set_ylabel(r'$\sqrt{1/APE}$')
ax1.legend(handles=ln1 + ln2)
fig.savefig(f"../figs/ri_ape_{dataset_name}_{model_name}.pdf", format='pdf', dpi=300, bbox_inches='tight')