In [49]:
%set_env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


In [50]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Conv1D, GlobalMaxPooling1D, Dropout 
from tensorflow.keras.layers import MaxPooling1D, Flatten, Activation
from tensorflow.keras.utils import to_categorical
from alibi.explainers import IntegratedGradients
from captum.attr import LayerIntegratedGradients, TokenReferenceBase
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # True

TF version:  2.3.1
Eager execution enabled:  True


## Load data

In [51]:
max_features = 10000
maxlen = 100

In [52]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
test_labels = y_test.copy()
train_labels = y_train.copy()
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
y_train, y_test = to_categorical(y_train), to_categorical(y_test)

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

index = imdb.get_word_index()
reverse_index = {value: key for (key, value) in index.items()} 

Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 100)
x_test shape: (25000, 100)


In [53]:
def decode_sentence(x, reverse_index):
    # the `-3` offset is due to the special tokens used by keras
    # see https://stackoverflow.com/questions/42821330/restore-original-text-from-keras-s-imdb-dataset
    return " ".join([reverse_index.get(i - 3, 'UNK') for i in x])

In [54]:
print(decode_sentence(x_test[1], reverse_index)) 

a powerful study of loneliness sexual UNK and desperation be patient UNK up the atmosphere and pay attention to the wonderfully written script br br i praise robert altman this is one of his many films that deals with unconventional fascinating subject matter this film is disturbing but it's sincere and it's sure to UNK a strong emotional response from the viewer if you want to see an unusual film some might even say bizarre this is worth the time br br unfortunately it's very difficult to find in video stores you may have to buy it off the internet


# Models

In [55]:
batch_size = 32
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250

In [56]:
device = torch.device("cpu")

### pytorch model

In [97]:
class Net(nn.Module):
    def __init__(self):

        super(Net, self).__init__()

        self.emb = nn.Embedding(max_features,
                               embedding_dims)
        self.linear1 = nn.Linear(5000, hidden_dims)
        self.linear2 = nn.Linear(hidden_dims, 2)

    def forward(self,x):
        x = self.emb(x)
        x = torch.flatten(x, 1)
        x = self.linear1(x)
        # x = F.relu(x) # Adding relu layers makes the attributions different
        x = self.linear2(x)
        return x


In [98]:
    
inputs = Input(shape=(maxlen,), 
               dtype='int32', 
               name='inputs')
out = Embedding(max_features,
                               embedding_dims, 
                               name='emb')(inputs)
out = Flatten(name='Flat', data_format='channels_last')(out)
out = Dense(hidden_dims, 
            name='linear1')(out)
# out = Activation('relu')(out) # Adding relu layers makes the attributions different
out = Dense(2, 
                name='linear2')(out)
model = Model(inputs=inputs, outputs=out)

In [99]:
net = Net()
net.to(device)

Net(
  (emb): Embedding(10000, 50)
  (linear1): Linear(in_features=5000, out_features=250, bias=True)
  (linear2): Linear(in_features=250, out_features=2, bias=True)
)

# transfer weights

In [100]:
lnames = []
for name in net.state_dict().keys():
    lname = name.split('.')[0]
    if lname not in lnames:
        lnames.append(lname)

In [101]:
for name in lnames:

    if 'conv' in name:
        ws = net.state_dict()[name + '.weight'].cpu().numpy()
        ws = np.transpose(ws, (2, 1, 0))
        bs = net.state_dict()[name + '.bias'].cpu().numpy()
        l = model.get_layer(name)
        l.set_weights([ws, bs])
    elif 'linear' in name:
        ws = net.state_dict()[name + '.weight'].cpu().numpy()
        ws = ws.T
        bs = net.state_dict()[name + '.bias'].cpu().numpy()
        l = model.get_layer(name)
        l.set_weights([ws, bs])
    elif 'emb' in name:
        ws = net.state_dict()[name + '.weight'].cpu().numpy()
        l = model.get_layer(name)
        l.set_weights([ws])
    print(name, ws.shape) #  , bs.shape)

emb (10000, 50)
linear1 (5000, 250)
linear2 (250, 2)


In [102]:
weights_emb_pt = net.state_dict()['emb.weight']
weights_emb_tf = model.layers[1].get_weights()

In [103]:
np.allclose(weights_emb_pt, weights_emb_tf, rtol=1e-03)

True

In [104]:
nb_samples = 10
torch_X_test = torch.from_numpy(x_test)
torch_y_test = torch.from_numpy(y_test)
x_test_sample = torch_X_test[:nb_samples]

tf predictions

In [105]:
model(x_test_sample.numpy())

<tf.Tensor: shape=(10, 2), dtype=float32, numpy=
array([[-0.0040464 ,  0.6038184 ],
       [ 0.49326396, -0.3999495 ],
       [-0.31098127,  0.08760172],
       [ 0.39619875,  0.48363605],
       [-0.2932438 ,  0.623353  ],
       [-0.19899763,  0.08206717],
       [ 0.1273778 ,  0.2847327 ],
       [-0.20898356,  0.08907583],
       [-0.37701988, -0.11866526],
       [-0.6475955 ,  0.34030014]], dtype=float32)>

pytorch predictions

In [106]:
net(x_test_sample.to(device))

tensor([[-0.0040,  0.6038],
        [ 0.4933, -0.3999],
        [-0.3110,  0.0876],
        [ 0.3962,  0.4836],
        [-0.2932,  0.6234],
        [-0.1990,  0.0821],
        [ 0.1274,  0.2847],
        [-0.2090,  0.0891],
        [-0.3770, -0.1187],
        [-0.6476,  0.3403]], grad_fn=<AddmmBackward>)

# Intgrads comparison

In [107]:
n_steps = 50
method = "gausslegendre"
internal_batch_size = 100

### pytorch

In [108]:
def interpret_sentence(model, indexed, min_len = 100, label = 1):

    input_indices = indexed.to(device)
    seq_length = min_len

    # predict
    pred = net.forward(input_indices)

    # generate reference indices for each sample
    reference_indices = torch.tensor(np.zeros(input_indices.shape), dtype=int).to(device)

    # compute attributions and approximation delta using layer integrated gradients
    attributions_ig, delta = lig.attribute(input_indices, 
                                           reference_indices, 
                                           target=label,
                                           method=method,
                                           n_steps=50, 
                                           return_convergence_delta=True)
    
    return attributions_ig, delta, reference_indices.numpy()

In [109]:
lig = LayerIntegratedGradients(net, net.emb)

In [110]:
token_reference = TokenReferenceBase(reference_token_idx=0)
# For simplicity, we compute the attribution relative to label = 1 for all samples
attributions_pt, delta, reference_indices = interpret_sentence(net, 
                                                               x_test_sample, 
                                                               label=1)
attributions_pt = attributions_pt.numpy()
print('Attributions shape:', attributions_pt.shape)

Attributions shape: (10, 100, 50)


### tf

In [111]:
layer = model.layers[1]
layer

<tensorflow.python.keras.layers.embeddings.Embedding at 0x7fbe97c2db10>

In [112]:
ig  = IntegratedGradients(model,
                          layer=layer,
                          n_steps=n_steps, 
                          method=method,
                          internal_batch_size=internal_batch_size)

In [None]:
x_test_sample = x_test_sample.numpy()
predictions = model(x_test_sample).numpy().argmax(axis=1)
explanation = ig.explain(x_test_sample, 
                         baselines=reference_indices, 
                         target=1)
# Get attributions values from the explanation object
attributions_tf = explanation.attributions[0]
print('Attributions shape:', attributions_tf.shape)

### Compare

In [114]:
np.allclose(attributions_tf, attributions_pt, rtol=1e-03)

True