In [4]:
%load_ext autoreload
%autoreload 2

from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl
from os.path import join as oj
from copy import deepcopy
import pandas as pd
import numpy.random as npr
import time
sys.path.append('../../src')
sys.path.append('../../interp')
import utils, lcp, train
from scipy.stats import ttest_ind, spearmanr
from typing import Dict
from compare_stats import compare_stats
import gen_data

# sklearn models
from sklearn.model_selection import train_test_split
from sklearn import metrics
from all_scores import get_scores

cred = (234/255, 51/255, 86/255)
cblue = (57/255, 138/255, 242/255)
out_dir = '../../results/interp_sim'
os.makedirs(out_dir, exist_ok=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
def get_data(seed=15, sim_num=1):
    np.random.seed(seed)
    # generate data
    d = 10
    n = 1000
    var_eps = 0.1

    if sim_num == 1:
        var_eps = 1
    elif sim_num == 2:
        var_eps = 10
    elif sim_num == 3:
        d = 100
    beta = np.zeros(d)
    beta[0] = 1
    beta[1] = 2       
        
        
    # pick beta
    X, y, _ = gen_data.gen_gaussian_linear_data(n=n, d=d, beta=beta, var_eps=var_eps, 
                 s=None, shift_type='None', shift_val=0.1, logistic=True)
    return train_test_split(X, y), beta # split defaults to 0.75: 0.25 split

In [7]:
sim_nums = list(range(3))
num_points = 100
tabs_list = []
class_weights = [0.5, 1.0, 2.0]
for tqdm(point_num in range(num_points)):
    
    vals_list = []
    for sim_num in sim_nums:
        (X_train, X_test, y_train, y_test), beta = get_data(sim_num=sim_num)

        # train and get importance scores
        ms = train.train_models(X_train, y_train, 
                                class_weight=class_weight, model_type='logistic')
        scores = get_scores(m, X_train, X_test[point_num], mode='classification')
        # pd.DataFrame(scores).style.background_gradient(cmap='viridis')

        # dataset of statistics based on importance scores
        ds = compare_stats(beta, scores)

        # record accuracy of model
        ds['r2'] = {'': metrics.r2_score(m.predict(X_test), y_test)}

        ind_tuples = [(outerKey, innerKey) for outerKey, innerDict in sorted(ds.items()) for innerKey, values in sorted(innerDict.items())]
        ind = pd.MultiIndex.from_tuples(ind_tuples, names=['score', 'metric'])
        vals = np.array([values for outerKey, innerDict in sorted(ds.items()) for innerKey, values in sorted(innerDict.items())])
        vals_list.append(vals.reshape(-1, vals.size))
    vals = np.array(vals_list).squeeze()
    tab = pd.DataFrame(vals, columns=ind, index=[f'Sim {str(i)}' for i in sim_nums])
    tabs_list.append(tab)
tab = pd.concat(tabs_list).groupby(level=0).mean().round(decimals=2)

W0802 02:28:58.060601 139849099495232 kernel.py:108] Using 750 background data samples could cause slower run times. Consider using shap.kmeans(data, K) to summarize the background as K weighted samples.
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \
W0802 02:29:02.522609 139849099495232 kernel.py:108] Using 750 background data samples could cause slower run times. Consider using shap.kmeans(data, K) to summarize the background as K weighted samples.
  "l1_reg=\"auto\" is deprecated and in the next version (v0.29) the behavior will change from a " \


In [5]:
tab

score,ice-contrib,ice-contrib,ice-contrib,ice-sensitivity,ice-sensitivity,ice-sensitivity,lime,lime,lime,r2,shap,shap,shap
metric,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Unnamed: 10_level_1,Fraction Correct Signs,Fraction Intersect,Rank Corr
dset 0,0.33,1.0,-1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.98,0.0,1.0,-1.0
dset 1,1.0,0.33,0.33,1.0,1.0,1.0,0.67,1.0,0.33,-0.04,0.67,0.5,-0.33


In [23]:
from visualize import background_gradient, cm

def style_tab(tab):
    vals = tab
    # vals = vals.drop(('r2', ''), axis=1)
    vals = vals.style.applymap(lambda val : 'color: black')

    importances = ['ice-contrib', 'ice-sensitivity', 'lime', 'shap']
    vals = vals.apply(background_gradient, axis=None, 
                            cmap=cm, cmin=0, cmax=1)
    vals = vals.apply(background_gradient, axis=None, 
                            cmap=cm, cmin=-1, cmax=1,
                            subset=[(imp, 'Rank Corr') for imp in importances])
    return vals

score,ice-contrib,ice-contrib,ice-contrib,ice-sensitivity,ice-sensitivity,ice-sensitivity,lime,lime,lime,shap,shap,shap
metric,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr
dset 0,0.33,1.0,-1.0,1,1,1,0.0,1,1.0,0.0,1.0,-1.0
dset 1,1.0,0.33,0.33,1,1,1,0.67,1,0.33,0.67,0.5,-0.33


In [70]:
vals.columns.levels

# convert to html/latex

In [None]:
vals = vals.set_properties(**{'text-align': 'center'})
html = vals.render()
with open(oj(out_dir, 'table_classification.html'), 'w') as f:
    f.write(html)

In [8]:
s = tab.transpose().to_latex(multicolumn_format='c')
s = s.replace('\\toprule', '').replace('\\bottomrule', '')
print(s)

\begin{tabular}{llrr}

     &           &  Sim 0 &  Sim 1 \\
score & metric &        &        \\
\midrule
ice-contrib & Fraction Correct Signs &    0.5 &   0.50 \\
     & Fraction Intersect &    1.0 &   1.00 \\
     & Rank Corr &    1.0 &  -1.00 \\
ice-sensitivity & Fraction Correct Signs &    1.0 &   1.00 \\
     & Fraction Intersect &    1.0 &   1.00 \\
     & Rank Corr &    1.0 &   1.00 \\
lime & Fraction Correct Signs &    0.5 &   0.00 \\
     & Fraction Intersect &    1.0 &   1.00 \\
     & Rank Corr &    1.0 &   1.00 \\
r2 &           &    0.9 &   0.64 \\
shap & Fraction Correct Signs &    0.5 &   0.00 \\
     & Fraction Intersect &    1.0 &   0.50 \\
     & Rank Corr &    1.0 &  -1.00 \\

\end{tabular}

