# Attribution Visualization Examples

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys

import torch

sys.path.append("../..")
from interpreto.attributions.base import AttributionOutput

## Mono Class

### Basic Test

Here is a simple example of a attribution visualization.

In [None]:
# attributions (1 classe)
sentence = ["A", "B", "C", "one", "two", "three"]

# Simulate attributions for a single class classification task
attributions = torch.linspace(-10, 10, steps=len(sentence))
single_class_classification_output = AttributionOutput(elements=sentence, attributions=attributions)

In [4]:
# Default display
from interpreto.visualizations.attributions.classification_highlight import SingleClassAttributionVisualization

viz = SingleClassAttributionVisualization(attribution_output=single_class_classification_output)
viz.display()

In [5]:
# Highlight the border
viz = SingleClassAttributionVisualization(attribution_output=single_class_classification_output, highlight_border=True)
viz.display()

In [6]:
# Disable the normalization
viz = SingleClassAttributionVisualization(attribution_output=single_class_classification_output, normalize=False)
viz.display()

It is also possible to save the results of the visualization as an HTML file

In [None]:
# viz.save("attributions_monoclass.html")

### Occlusion Test on BERT outputs

A complete test on result from BERT

In [8]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from interpreto.attributions.methods.occlusion import OcclusionExplainer
from interpreto.attributions.perturbations.base import GranularityLevel
from interpreto.commons.model_wrapping.classification_inference_wrapper import ClassificationInferenceWrapper
from interpreto.visualizations.attributions.classification_highlight import SingleClassAttributionVisualization

In [9]:
model_name = "textattack/bert-base-uncased-imdb"
test_sentences = ["Best movie ever", "Worst movie ever verylongword"]
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
inference_wrapper = ClassificationInferenceWrapper(model=model, batch_size=4)
exp = OcclusionExplainer(
    model=model,
    tokenizer=tokenizer,
    # inference_wrapper=inference_wrapper,
    batch_size=4,
    granularity_level=GranularityLevel.WORD,
)
explaination = exp.explain(test_sentences)

for elem in explaination:
    print(elem.attributions, elem.elements)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


tensor([2.3434, 0.3201, 0.4798], device='cuda:0') ['best', 'movie', 'ever']
tensor([4.0143, 1.1912, 0.5362, 0.5000], device='cuda:0') ['worst', 'movie', 'ever', 'verylongword']


In [10]:
for sentence_explained in explaination:
    viz = SingleClassAttributionVisualization(attribution_output=sentence_explained)
    viz.display()

## Multi Class

In [11]:
# attributions (2 classes)
nb_classes = 2
inputs_sentences = ["A", "B", "C", "one", "two", "three"]

# Simulate and attribution output for the 1st sentence
sentence = inputs_sentences
# attributions = torch.rand(nb_classes, len(sentence)) # (c, l)
attributions = torch.tensor([[0.1, 0.2, -0.3, -0.4, 0.5, 1.0], [0.6, 0.5, 0.4, 0.3, 0.2, -1]])
attribution_output = AttributionOutput(elements=sentence, attributions=attributions)

In [12]:
# Default display for the 1st sentence
from interpreto.visualizations.attributions.classification_highlight import MultiClassAttributionVisualization

viz = MultiClassAttributionVisualization(attribution_output=attribution_output, class_names=["class 1", "class 2"])
viz.display()

## Generation

In [13]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [14]:
# Create an explainer for a pre-trained model (e.g., GPT-2)
model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

explainer = OcclusionExplainer(
    model=model, batch_size=4, tokenizer=tokenizer, granularity_level=GranularityLevel.ALL_TOKENS
)

In [15]:
# Explain outputs generated from an input sentence
attribution_outputs = explainer.explain(model_inputs="Hi there, how are you?", generation_kwargs={"max_length": 10})

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [16]:
print("attribution tensor shape for the 1st sentence:", attribution_outputs[0].attributions.shape)
# attributions: 20x27
# 20 output tokens , 27 attribution value for each one (input + output)

attribution tensor shape for the 1st sentence: torch.Size([20, 27])


In [17]:
# Visualize the attribution results
from interpreto.visualizations.attributions.classification_highlight import GenerationAttributionVisualization

viz = GenerationAttributionVisualization(attribution_output=attribution_outputs[0])
viz.display()