# Interpreting text models: IMDB sentiment analysis

Captum (“comprehension” in Latin) is an open source, extensible library for model interpretability built on PyTorch. The primary audiences for Captum are model developers who are looking to improve their models and understand which features are important and interpretability researchers focused on identifying algorithms that can better interpret many types of models.

#### Sources: 
* https://captum.ai/tutorials/IMDB_TorchText_Interpret
* https://captum.ai/docs/introduction.html
* https://captum.ai/docs/extension/integrated_gradients
* https://arxiv.org/pdf/1703.01365.pdf
* https://towardsdatascience.com/interpretable-neural-networks-45ac8aa91411
* https://github.com/ankurtaly/Integrated-Gradients
* http://theory.stanford.edu/~ataly/Talks/sri_attribution_talk_jun_2017.pdf
* https://medium.com/@kartikeyabhardwaj98/integrated-gradients-for-deep-neural-networks-c114e3968eae
* http://www.unofficialgoogledatascience.com/2017/03/attributing-deep-networks-prediction-to.html
* https://psturmfels.github.io/VisualizingExpectedGradients/
* https://www.youtube.com/watch?v=iVSIFm0UN9I
* https://medium.com/@SeoJaeDuk/archived-post-axiomatic-attribution-for-deep-networks-b4af79d5ed32
* http://pages.cs.wisc.edu/~jiefeng/docs/neurips2019/poster.pdf
* http://pages.cs.wisc.edu/~jiefeng/docs/neurips2019/slides.pdf
* https://vimeo.com/238242575

#### Let's get ready to execute this on SageMaker

In [None]:
%%bash

pip install --upgrade pip
pip install torchtext nvidia-ml-py3 captum
pip install -U spacy torch msgpack
python -m spacy download en
python -m spacy validate

In [None]:
import spacy

import torch
import torchtext
import torchtext.data
import torch.nn as nn
import torch.nn.functional as F

from torchtext.vocab import Vocab

from captum.attr import IntegratedGradients
from captum.attr import InterpretableEmbeddingBase, TokenReferenceBase
from captum.attr import visualization
from captum.attr import configure_interpretable_embedding_layer, remove_interpretable_embedding_layer

nlp = spacy.load('en')

In [None]:
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")

In [None]:
device

In [None]:
#!wget -cv https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz
!mkdir data models
!(cd data ; wget -cv https://ar51.s3.amazonaws.com/aclImdb_v1.tar.gz)
!(cd data ; tar -xzf aclImdb_v1.tar.gz)
!(cd models ; wget -cv https://ar51.s3.amazonaws.com/imdb-model-cnn.pt)

In [None]:
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        
        #text = [sent len, batch size]
        #text = text.permute(1, 0)
        #text = [batch size, sent len]
        embedded = self.embedding(text)
        #embedded = [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1)
        #embedded = [batch size, 1, sent len, emb dim]
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        #pooled_n = [batch size, n_filters]
        cat = self.dropout(torch.cat(pooled, dim = 1))
        #cat = [batch size, n_filters * len(filter_sizes)]
        return self.fc(cat)

In [None]:
!ls models

In [None]:
model = torch.load('models/imdb-model-cnn.pt')
model.eval()

In [None]:
def forward_with_sigmoid(input):
    return torch.sigmoid(model(input))

In [None]:
TEXT = torchtext.data.Field(lower=True, tokenize='spacy')
Label = torchtext.data.LabelField(dtype = torch.float)

In [None]:
train, test = torchtext.datasets.IMDB.splits(text_field=TEXT,
                                      label_field=Label,
                                      train='train',
                                      test='test',
                                      path='data/aclImdb')
test, _ = test.split(split_ratio = 0.04)

In [None]:
#!wget -cv http://nlp.stanford.edu/data/glove.6B.zip
!(cd data ; wget -cv https://ar51.s3.amazonaws.com/glove.6B.50d.txt.tar.gz)
!(cd data ; tar -xzf glove.6B.50d.txt.tar.gz)

In [None]:
!ls data/glove.6B.50d.txt

In [None]:
from torchtext import vocab

#loaded_vectors = vocab.GloVe(name='6B', dim=50)

# If you prefer to use pre-downloaded glove vectors, you can load them with the following two command line
loaded_vectors = torchtext.vocab.Vectors('data/glove.6B.50d.txt')
TEXT.build_vocab(train, vectors=loaded_vectors, max_size=len(loaded_vectors.stoi))
    
TEXT.vocab.set_vectors(stoi=loaded_vectors.stoi, vectors=loaded_vectors.vectors, dim=loaded_vectors.dim)
Label.build_vocab(train)

In [None]:
print('Vocabulary Size: ', len(TEXT.vocab))

In [None]:
PAD_IND = TEXT.vocab.stoi['pad']

In [None]:
token_reference = TokenReferenceBase(reference_token_idx=PAD_IND)

In [None]:
interpretable_embedding = configure_interpretable_embedding_layer(model, 'embedding')

In [None]:
ig = IntegratedGradients(model)

In [None]:
# accumalate couple samples in this array for visualization purposes
vis_data_records_ig = []

def interpret_sentence(model, sentence, min_len = 7, label = 0):
    model.eval()
    text = [tok.text for tok in nlp.tokenizer(sentence)]
    if len(text) < min_len:
        text += ['pad'] * (min_len - len(text))
    indexed = [TEXT.vocab.stoi[t] for t in text]

    
    model.zero_grad()

    input_indices = torch.LongTensor(indexed)
    input_indices = input_indices.unsqueeze(0)
    
    # input_indices dim: [sequence_length]
    seq_length = min_len

    # pre-computing word embeddings
    input_embedding = interpretable_embedding.indices_to_embeddings(input_indices)

    # predict
    pred = forward_with_sigmoid(input_embedding).item()
    pred_ind = round(pred)

    # generate reference for each sample
    reference_indices = token_reference.generate_reference(seq_length, device=device).unsqueeze(0)
    reference_embedding = interpretable_embedding.indices_to_embeddings(reference_indices)

    # compute attributions and approximation delta using integrated gradients
    attributions_ig, delta = ig.attribute(input_embedding, reference_embedding, n_steps=500, return_convergence_delta=True)

    print('pred: ', Label.vocab.itos[pred_ind], '(', '%.2f'%pred, ')', ', delta: ', abs(delta))

    add_attributions_to_visualizer(attributions_ig, text, pred, pred_ind, label, delta, vis_data_records_ig)
    
def add_attributions_to_visualizer(attributions, text, pred, pred_ind, label, delta, vis_data_records):
    attributions = attributions.sum(dim=2).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    attributions = attributions.detach().numpy()

    # storing couple samples in an array for visualization purposes
    vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred,
                            Label.vocab.itos[pred_ind],
                            Label.vocab.itos[label],
                            Label.vocab.itos[1],
                            attributions.sum(),       
                            text[:len(attributions)],
                            delta))

In [None]:
torch.cuda.is_available()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = 'cpu'

In [None]:
interpret_sentence(model, 'It was a fantastic performance !', label=1)
interpret_sentence(model, 'Best film ever', label=1)
interpret_sentence(model, 'Such a great show!', label=1)
interpret_sentence(model, 'It was a horrible movie', label=0)
interpret_sentence(model, 'I\'ve never watched something as bad', label=0)
interpret_sentence(model, 'It is a disgusting movie!', label=0)

In [None]:
print('Visualize attributions based on Integrated Gradients')
visualization.visualize_text(vis_data_records_ig)

In [None]:
vis_data_records_ig

In [None]:
from IPython.display import Image
Image(filename='img/sentiment_analysis.png')
