Tutorial written by Silvia Tulli

In this tutorial we will learn how to explain a Zero-Shot Classifier to Analyse COVID-19 Content on Twitter using Transformer-Interpret.

In [None]:
# Install dependencies
!pip install transformers
!pip install captum

Collecting transformers
  Downloading transformers-4.34.1-py3-none-any.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m38.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m77.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m92.0 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
!pip install transformers-interpret

Collecting transformers-interpret
  Downloading transformers_interpret-0.10.0-py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.8/45.8 kB[0m [31m679.6 kB/s[0m eta [36m0:00:00[0m
Collecting jedi>=0.16 (from ipython<8.0.0,>=7.31.1->transformers-interpret)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, transformers-interpret
Successfully installed jedi-0.19.1 transformers-interpret-0.10.0


In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import ZeroShotClassificationExplainer

tokenizer = AutoTokenizer.from_pretrained("digitalepidemiologylab/covid-twitter-bert-v2-mnli")

model = AutoModelForSequenceClassification.from_pretrained("digitalepidemiologylab/covid-twitter-bert-v2-mnli")


zero_shot_explainer = ZeroShotClassificationExplainer(model, tokenizer)


word_attributions = zero_shot_explainer(
    "To stop the pandemic it is important that everyone turns up for their shots.'",
    labels = ["health","sport","vaccine","guns"],
)

Downloading (…)okenizer_config.json:   0%|          | 0.00/364 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/833 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [None]:
word_attributions

{'health': [('[CLS]', 0.0),
  ('to', -0.409705436155103),
  ('stop', -0.3602643192505606),
  ('the', -0.25822812485500524),
  ('pan', -0.11560410132002051),
  ('##de', -0.22941992643050205),
  ('##mic', -0.183580275880144),
  ('it', -0.24107414881783337),
  ('is', -0.12519768364119768),
  ('important', -0.2242217064460105),
  ('that', -0.16980469988790173),
  ('everyone', -0.361009839394181),
  ('turns', -0.07093873368810598),
  ('up', -0.1623858317990037),
  ('for', 0.11513724084570015),
  ('their', 0.08057615185869217),
  ('shots', -0.42848800218561356),
  ('.', -0.13412282770848571)],
 'sport': [('[CLS]', 0.0),
  ('to', -0.13532923939411393),
  ('stop', -0.06250861733519689),
  ('the', 0.08080216465962488),
  ('pan', -0.14493656171592345),
  ('##de', -0.01612227802901909),
  ('##mic', -0.022130708524992295),
  ('it', 0.02672290750310017),
  ('is', 0.08724048458833641),
  ('important', 0.03793640693438463),
  ('that', -0.04887780865450793),
  ('everyone', -0.3367216965909009),
  ('tu

In [None]:
zero_shot_explainer.predicted_label

'health'

In [None]:
zero_shot_explainer.visualize("zero_shot.html")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
health,health (0.37),health,-3.28,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
sport,sport (0.17),sport,-1.87,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
vaccine,vaccine (0.30),vaccine,-3.29,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
guns,guns (0.16),guns,1.74,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
health,health (0.37),health,-3.28,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
sport,sport (0.17),sport,-1.87,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
vaccine,vaccine (0.30),vaccine,-3.29,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
guns,guns (0.16),guns,1.74,[CLS] to stop the pan ##de ##mic it is important that everyone turns up for their shots .
,,,,
