# 3: Extract Local Weighting

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import MinMaxScaler

from collections import Counter

from copy import deepcopy

import lime
import lime.lime_tabular

from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation
from keras import backend as K

# For Deep Learning Explanations
from deepexplain.tensorflow import DeepExplain
import deeplift
from deeplift.conversion import kerasapi_conversion as kc

Using TensorFlow backend.


In [2]:
df = pd.read_csv("processed_df.csv")

In [3]:
X_train = np.load("X_train.npy")
X_test = np.load("X_test.npy")
y_train = np.load("y_train.npy")
y_test = np.load("y_test.npy")

## First Generate Gradient Matrices:

In [4]:
model = load_model("NN.h5")

In [5]:
def flatten_predict(i):
    """
    LIME doesn't support the format Keras uses, so we need a small helper function
    
    This just predicts a probability and gives the alternative also.
    """
    
    global model
    
    probability_yes = model.predict_proba(i)
    x = np.zeros((probability_yes.shape[0], 1))
    probability_no = (x + 1) - probability_yes
    final = np.append(probability_no, probability_yes, axis=1)
    
    return final

### NB
Although the use of LIME here is incorrect regarding the integration of categorical variables, it is necessary for experiments because we need coefficients for all features, including all one hot encoded features. Nugent and Cunningham's paper requires this approach to implement it. 

If using LIME to acquire standard explanations, please refer to https://github.com/marcotcr/lime for the proper treatment of categorical variables. It was discovered that this approach worked better for $agreement$ scores in experiments than implementing our own code to implement Nugent and Cunningham's method from scratch (as they didn't provide code with their publication).

In [12]:
def which_features_ohe(df):
    """
    Locate which features are categorical ohe, they should just have 2 values 0 and 1
    
    return: a list of integers referring to the ohe indexes
    """
    
    categorical_features = list()
    for i in range(len(df.columns)):
        if df[df.columns[i]].value_counts().shape == (2,):
            categorical_features.append(i)
    return categorical_features

In [13]:
categorical_features = which_features_ohe(df)
feature_names = df.columns
num_features = df.shape[1]

In [8]:
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, 
                                                   feature_names=feature_names,
                                                   categorical_features=categorical_features,
                                                   class_names=['N', "Y"],
                                                   verbose=False, 
                                                   discretize_continuous=False,
                                                   mode='classification')

In [10]:
X_train_grad_LIME = list()

for i in range(len(X_train)):
    
    if i % 1000 == 0:
        print((i / len(X_train)) * 100, "% done...")
    
    qc = X_train[i]
    exp = explainer.explain_instance(qc, flatten_predict, num_features=num_features)
    
    # Get real coefficients
    coefs = exp.as_map()[1]
    coefs.sort()
    coefs = [x[1] for x in coefs]

    X_train_grad_LIME.append(np.append(coefs, exp.intercept[1]).tolist())

0.0 % done...
4.166666666666666 % done...
8.333333333333332 % done...
12.5 % done...
16.666666666666664 % done...
20.833333333333336 % done...
25.0 % done...
29.166666666666668 % done...
33.33333333333333 % done...
37.5 % done...
41.66666666666667 % done...
45.83333333333333 % done...
50.0 % done...
54.166666666666664 % done...
58.333333333333336 % done...
62.5 % done...
66.66666666666666 % done...
70.83333333333334 % done...
75.0 % done...
79.16666666666666 % done...
83.33333333333334 % done...
87.5 % done...
91.66666666666666 % done...
95.83333333333334 % done...


In [11]:
X_test_grad_LIME = list()

for i in range(len(X_test)):
    
    if i % 1000 == 0:
        print((i / len(X_test)) * 100, "% done...")
    
    qc = X_test[i]
    exp = explainer.explain_instance(qc, flatten_predict, num_features=num_features)
    
    # Get real coefficients
    coefs = exp.as_map()[1]
    coefs.sort()
    coefs = [x[1] for x in coefs]

    X_test_grad_LIME.append(np.append(coefs, exp.intercept[1]).tolist())

0.0 % done...
16.666666666666664 % done...
33.33333333333333 % done...
50.0 % done...
66.66666666666666 % done...
83.33333333333334 % done...


In [14]:
X_train_grad_LIME = np.array(X_train_grad_LIME)
X_test_grad_LIME = np.array(X_test_grad_LIME)
np.save("X_train_grad_LIME", X_train_grad_LIME)
np.save("X_test_grad_LIME", X_test_grad_LIME)

## Integrated Gradients and Layerwise Relevance Propagation
https://github.com/marcoancona/DeepExplain

In [15]:
with DeepExplain(session=K.get_session()) as de:  # <-- init DeepExplain context
    input_tensor = model.layers[0].input
    fModel = Model(inputs=input_tensor, outputs = model.layers[-2].output)
    target_tensor = fModel(input_tensor)
    
    xs = X_train
    
    X_train_intgrad = de.explain('intgrad', target_tensor, input_tensor, xs)
    X_train_lrp = de.explain('elrp', target_tensor, input_tensor, xs)
    
    
with DeepExplain(session=K.get_session()) as de:  # <-- init DeepExplain context
    input_tensor = model.layers[0].input
    fModel = Model(inputs=input_tensor, outputs = model.layers[-2].output)
    target_tensor = fModel(input_tensor)
    
    xs = X_test
    
    X_test_intgrad = de.explain('intgrad', target_tensor, input_tensor, xs)
    X_test_lrp = de.explain('elrp', target_tensor, input_tensor, xs)
    
np.save("X_train_intgrad", X_train_intgrad)
np.save("X_test_intgrad", X_test_intgrad)
np.save("X_train_lrp", X_train_lrp)
np.save("X_test_lrp", X_test_lrp)

DeepExplain: running "intgrad" explanation method (3)
Model with multiple inputs:  False
DeepExplain: running "elrp" explanation method (4)
Model with multiple inputs:  False
DeepExplain: running "intgrad" explanation method (3)
Model with multiple inputs:  False
DeepExplain: running "elrp" explanation method (4)
Model with multiple inputs:  False


## DeepLIFT Contributions:
https://github.com/kundajelab/deeplift

In [16]:
deeplift_model =\
    kc.convert_model_from_saved_files(
        "NN.h5",
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault) 
    
find_scores_layer_idx = 0

deeplift_contribs_func = deeplift_model.get_target_contribs_func(
                            find_scores_layer_idx=find_scores_layer_idx,
                            target_layer_idx=-2)

X_train_deeplift = np.array(deeplift_contribs_func(task_idx=0,
                                         input_data_list=[X_train],
                                         batch_size=10,
                                         progress_update=1000))

X_test_deeplift = np.array(deeplift_contribs_func(task_idx=0,
                                         input_data_list=[X_test],
                                         batch_size=10,
                                         progress_update=1000))


np.save("X_train_deeplift", X_train_deeplift)
np.save("X_test_deeplift", X_test_deeplift)

nonlinear_mxts_mode is set to: DeepLIFT_GenomicsDefault
For layer 1 the preceding linear layer is 0 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
Heads-up: I assume sigmoid is the output layer, not an intermediate one; if it's an intermediate layer then please bug me and I will implement the grad func
For layer 3 the preceding linear layer is 2 of type Dense;
In accordance with nonlinear_mxts_modeDeepLIFT_GenomicsDefault we are setting the NonlinearMxtsMode to RevealCancel
No reference provided - using zeros
Done 0
Done 1000
Done 2000
Done 3000
Done 4000
Done 5000
Done 6000
Done 7000
Done 8000
Done 9000
Done 10000
Done 11000
Done 12000
Done 13000
Done 14000
Done 15000
Done 16000
Done 17000
Done 18000
Done 19000
Done 20000
Done 21000
Done 22000
Done 23000
No reference provided - using zeros
Done 0
Done 1000
Done 2000
Done 3000
Done 4000
Done 5000
