In [36]:
%load_ext autoreload
%autoreload 2

from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pickle as pkl
from os.path import join as oj
from copy import deepcopy
import pandas as pd
import numpy.random as npr
import seaborn as sns
import time
sys.path.append('../../src')
sys.path.append('../../interp')
import utils, lcp, train
from scipy.stats import ttest_ind, spearmanr
from typing import Dict
from compare_stats import compare_stats
import gen_data

# sklearn models
from sklearn.model_selection import train_test_split
from sklearn import metrics
from all_scores import get_scores
from style import style_tab

cred = (234 / 255, 51 / 255, 86 / 255)
cblue = (57 / 255, 138 / 255, 242 / 255)
out_dir = '../../results/interp_sim'
os.makedirs(out_dir, exist_ok=True)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
def get_data(seed=15, sim_num=1):
    np.random.seed(seed)
    # generate data
    
    n = 10000
    d = 5
    var_eps = 1e-2
    if sim_num == 0:
#         d = 5
        var_eps = 1e-2
    elif sim_num == 1:
        d = 10
        var_eps = 1e-1
    elif sim_num == 2:
#         d = 5
        var_eps = 1e-3
#     elif sim_num == 2:
#     if sim_num == 2:
#         d = 5
    elif sim_num == 3:
        d = 10
        var_eps = 1e-3
    elif sim_num == 4:
#         d = 5
        var_eps = 1e-2
        
    beta = np.zeros(d)
    beta[0] = 1
    beta[1] = 2       
    
    if sim_num == 5:
        beta[2] = 3
    elif sim_num == 6:
        beta[2] = 3
        beta[3] = 4
    elif sim_num == 7:
        beta[0] = 1
        beta[1] = 2
        var_eps = 0
        n = 50000
        
    # pick beta
    X, y, _ = gen_data.gen_gaussian_linear_data(n=n, d=d, beta=beta, var_eps=var_eps, 
                 s=None, shift_type='None', shift_val=0.1, logistic=True)
    
    if sim_num in [2, 3, 4]:
        y = beta[0] * X[:, 0] + beta[1] * X[:, 1]
    elif sim_num in [7]:
        y = beta[0] * beta[1] * np.sqrt(np.multiply(X[:, 0], X[:, 1]))
        
        
    # conver y to prob
    if not np.unique(y).size ==2:
        pr = 1 / (1 + np.exp(-y)) 
        
        # binomial distr (bernoulli response var)
        # n trials, probability p
        z = np.random.uniform(size=n) # random number 0-1
        y = (z <= pr).astype(np.int32)
    return train_test_split(X, y), beta # split defaults to 0.75: 0.25 split

In [None]:
sim_nums = [7]
mod_types = ['logistic', 'mlp2', 'gb']
num_points = 1
tabs_list = []
class_weights = [0.5, 1.0, 2.0]
for point_num in tqdm(range(num_points)):
    vals_list = []
    for sim_num in sim_nums:
        for model_type in mod_types:
            (X_train, X_test, y_train, y_test), beta = get_data(sim_num=sim_num)

            # train and get importance scores
            ms = train.train_models(X_train, y_train, 
                                    class_weights=class_weights, model_type=model_type)
            scores = get_scores(ms, X_train, X_test[point_num], mode='classification')
            # pd.DataFrame(scores).style.background_gradient(cmap='viridis')

            # dataset of statistics based on importance scores
            ds = compare_stats(beta, {k: scores[k] for k in scores if not 'std' in k})

            # record accuracy of model
            ds['acc'] = {'': metrics.accuracy_score(ms[1].predict(X_test), y_test)}
            ds['std'] = {k: scores[k] for k in scores if 'std' in k}

            ind_tuples = [(outerKey, innerKey) for outerKey, innerDict in sorted(ds.items()) for innerKey, values in sorted(innerDict.items())]
            ind = pd.MultiIndex.from_tuples(ind_tuples, names=['score', 'metric'])
            vals = np.array([values for outerKey, innerDict in sorted(ds.items()) for innerKey, values in sorted(innerDict.items())])
            vals_list.append(vals.reshape(-1, vals.size))
    vals = np.array(vals_list).squeeze()
#     ind_dset = pd.MultiIndex.from_tuples([('Sim ' + str(i), mod) for i in sim_nums for mod in mod_types],
#                                          names=['Sim', 'Model'])
    ind_dset = ['Sim ' + str(i) + ' ' + mod for i in sim_nums 
                for mod in mod_types]
    
    tab = pd.DataFrame(vals, columns=ind, index=ind_dset)
    tabs_list.append(tab)
tab = pd.concat(tabs_list).groupby(level=0).mean().round(decimals=2)
tab.to_pickle('./results_7.pkl')

In [89]:
tab0 = pd.read_pickle('results_0_1_2.pkl')
tab1 = pd.read_pickle('results_3_4.pkl')
tab2 = pd.read_pickle('results_5_6.pkl')
tab3 = pd.read_pickle('results_7.pkl')
tab = pd.concat([tab0, tab1, tab2, tab3])
# style_tab(tab)
style_tab(tab[[col for col in tab if not 'std' in col]])

score,acc,ice-contrib,ice-contrib,ice-contrib,ice-sensitivity,ice-sensitivity,ice-sensitivity,lime,lime,lime,shap,shap,shap
metric,Unnamed: 1_level_1,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr,Fraction Correct Signs,Fraction Intersect,Rank Corr
Sim 0 gb,0.78,0.0,1.0,-1.0,1.0,0.5,1,0.0,1,1,0.0,1,-1.0
Sim 0 logistic,0.8,0.0,1.0,-1.0,1.0,1.0,1,0.0,1,1,0.0,1,-1.0
Sim 0 mlp2,0.79,0.0,1.0,-1.0,1.0,1.0,1,0.0,1,1,0.0,1,-1.0
Sim 1 gb,0.77,0.0,0.5,-1.0,1.0,1.0,-1,0.0,1,1,0.5,1,-1.0
Sim 1 logistic,0.79,0.5,1.0,-1.0,1.0,1.0,1,0.5,1,1,0.5,1,-1.0
Sim 1 mlp2,0.76,0.0,0.5,-1.0,1.0,1.0,1,0.5,1,1,0.5,1,-1.0
Sim 2 gb,0.77,1.0,1.0,1.0,1.0,1.0,1,1.0,1,-1,1.0,1,1.0
Sim 2 logistic,0.8,1.0,1.0,1.0,1.0,1.0,1,1.0,1,-1,1.0,1,1.0
Sim 2 mlp2,0.77,1.0,1.0,1.0,1.0,1.0,1,1.0,1,-1,1.0,1,1.0
Sim 3 gb,0.75,0.0,1.0,1.0,1.0,1.0,1,0.0,1,1,0.0,1,1.0


**check how good the uncertainty is**

In [126]:
corrs = np.zeros((4, 3))
cols = ['Fraction Intersect', 'Rank Corr', 'Fraction Correct Signs']
rows = ['ice-contrib', 'ice-sensitivity', 'lime', 'shap']
for r, imp in enumerate(rows):
    for c, metric in enumerate(cols):
        stats = tab[(imp, metric)]
        std = tab[('std', imp + '_std')]
        idxs_nan = np.isnan(stats) | np.isnan(std)
        corr = np.corrcoef(stats[~idxs_nan], std[~idxs_nan])[0, 1]
        corrs[r, c] = corr

results = pd.DataFrame(corrs, columns=cols, index=rows)
results = results.round(2).fillna('--')
print(results.round(2).to_latex(index=True).replace('\\toprule', '').replace('\\bottomrule', ''))

\begin{tabular}{llrr}

{} & Fraction Intersect &  Rank Corr &  Fraction Correct Signs \\
\midrule
ice-contrib     &               0.32 &       0.04 &                   -0.01 \\
ice-sensitivity &               0.01 &       0.18 &                    0.10 \\
lime            &                 -- &      -0.29 &                    0.13 \\
shap            &                 -- &       0.13 &                    0.46 \\

\end{tabular}



# convert to html/latex

In [113]:
vals = style_tab(tab[[col for col in tab if not 'std' in col]])

In [117]:
vals = vals.set_properties(**{'text-align': 'center'})
html = vals.render()
with open(oj(out_dir, 'table_classification.html'), 'w') as f:
    f.write(html)

In [None]:
s = tab.transpose().to_latex(multicolumn_format='c')
s = s.replace('\\toprule', '').replace('\\bottomrule', '')
print(s)