In [1]:
import torch
import torch.nn as nn
import fastai.train
import pandas as pd
from captum.attr import LayerIntegratedGradients

Load a fast.ai `Learner` trained to predict IMDB review category `[negative, positive]`

In [2]:
awd = fastai.train.load_learner('.','imdb_fastai_trained_lm_clf.pth')
awd.model[0].bptt = 200

In [3]:
# getting to the actual layer that holds embeddings
embedding_layer = awd.model[0]._modules['module']._modules['encoder_dp']

# working around the model prediction - first output only, apply softmax
forward_func = lambda x: torch.softmax(awd.model(x)[0], dim=-1)

In [4]:
# make integrated gradients instance
lig = LayerIntegratedGradients(
    forward_func, 
    embedding_layer
)

In [5]:
def get_attributions_for_sentence(sentence = 'Best film ever', 
                                  awd_model=awd, 
                                  lig_instance=lig,
                                  target = None, 
                                  lig_n_steps = 200,
                                  baseline_token='.'):
    awd = awd_model
    lig = lig_instance
    vocab = awd.data.x.vocab
    sentence_tokens = awd.data.one_item(sentence)[0]
    reversed_tokens = [vocab.itos[w] for w in sentence_tokens[0]]
    baseline = torch.ones_like(sentence_tokens) * vocab.stoi[baseline_token] # see "how to choose a good baseline"
    baseline[0,0] = vocab.stoi['xxbos'] # beginning of sentence is always #1
    y = awd.predict(sentence)
    if target is None:
        target = y[1].item()
    attrs = lig.attribute(sentence_tokens, baseline, target, n_steps=lig_n_steps)
    a = attrs.sum(-1)
    a = a / torch.norm(a)
    return (
        pd.Series(a.numpy()[0], index=reversed_tokens),
        y
    )

In [6]:
# https://www.imdb.com/review/rw5384922/?ref_=tt_urv
review_1917 = """
I sat in a packed yet silent theater this morning and watched, what I believe to be, the next Academy Award winner for the Best Picture."""

In [7]:
attributions, prediction = get_attributions_for_sentence(review_1917, baseline_token='\n \n ')

In [8]:
len(attributions)

119

ipyvuetify installation:

https://github.com/mariobuikhuizen/ipyvuetify/#installation

In [8]:
import ipyvuetify as v

In [9]:
class Chip(v.Chip):
    positive = '0, 255, 0'
    negative = '255, 0, 0'
    def __init__(self, word, attribution):
        direction = self.positive if attribution >= 0 else self.negative
        color = f'rgba({direction}, {abs(attribution):.2f})'
        super().__init__(class_='mx-0 px-1', 
                         children=[word], color=color, 
                         value=attribution,
                         label=True, small=True)
        
def saliency_chips(attributions:pd.Series) -> v.ChipGroup:
    children = [Chip(w, a)
           for w, a in attributions.iteritems()]
    return v.ChipGroup(column=True, children=children)

In [10]:
saliency_chips(attributions)

ChipGroup(children=[Chip(children=['xxbos'], class_='mx-0 px-1', color='rgba(0, 255, 0, 0.00)', label=True, sm…