<a href="https://colab.research.google.com/github/bharatji30/xir/blob/main/BERT_Explanation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers-interpret

In [2]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

In [15]:
model_name = "sentence-transformers/msmarco-distilbert-base-v3"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at sentence-transformers/msmarco-distilbert-base-v3 and are newly initialized: ['pre_classifier.weight', 'classifier.weight', 'pre_classifier.bias', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
print (tokenizer, model)

PreTrainedTokenizerFast(name_or_path='sentence-transformers/msmarco-distilbert-base-v3', vocab_size=30522, model_max_len=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}) DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
         

In [17]:
from transformers_interpret import SequenceClassificationExplainer

In [18]:
sample_text = "Sildenafil improves erectile function in men who experience sexual dysfunction as a result of the use of SSRI antidepressants"

In [19]:
multiclass_explainer = SequenceClassificationExplainer(model=model, tokenizer=tokenizer)
word_attributions = multiclass_explainer(text=sample_text)

In [20]:
word_attributions

[('[CLS]', 0.0),
 ('si', 0.041613399220383596),
 ('##lden', -0.011113602742330442),
 ('##af', 0.11206886363880716),
 ('##il', 0.09927411833739588),
 ('improves', -0.38293316991451065),
 ('erect', 0.5762652762905245),
 ('##ile', 0.16555157436768936),
 ('function', 0.256125501152639),
 ('in', 0.17765507581185003),
 ('men', 0.16769815616002728),
 ('who', -0.17280078494675385),
 ('experience', -0.08424545645862247),
 ('sexual', 0.35114832407486074),
 ('dysfunction', -0.17194354064109527),
 ('as', 0.05528197135445964),
 ('a', 0.013987176223212584),
 ('result', 0.013231851118603559),
 ('of', 0.021678607842851222),
 ('the', 0.017329754560860593),
 ('use', -0.039833190731103425),
 ('of', 0.007708910092208119),
 ('ssr', -0.3618945811961393),
 ('##i', -0.07557946373573275),
 ('anti', -0.0701179580345104),
 ('##de', 0.002657693661478643),
 ('##press', -0.08177164663076947),
 ('##ants', -0.01893339110639998),
 ('[SEP]', 0.0)]

In [21]:
html = multiclass_explainer.visualize()

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.52),LABEL_0,0.61,[CLS] si ##lden ##af ##il improves erect ##ile function in men who experience sexual dysfunction as a result of the use of ssr ##i anti ##de ##press ##ants [SEP]
,,,,


In [22]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import ZeroShotClassificationExplainer

tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")

model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")


zero_shot_explainer = ZeroShotClassificationExplainer(model, tokenizer)


word_attributions = zero_shot_explainer(
    "Today apple released the new Macbook showing off a range of new features found in the proprietary silicon chip computer. ",
    labels = ["finance", "technology", "sports"],
)

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

In [29]:
word_attributions = zero_shot_explainer("Sildenafil improves erectile function in men who experience sexual dysfunction as a result of the use of SSRI antidepressants",
                                        labels = ["finance", "technology", "sports"])

In [24]:
word_attributions

{'finance': [('<s>', 0.0),
  ('Today', 0.0),
  ('apple', -0.01623382003343027),
  ('released', 0.33482804979288155),
  ('the', -0.8933375616772289),
  ('new', 0.1420997097754301),
  ('Mac', 0.016123760807657716),
  ('book', -0.06955582425020064),
  ('showing', -0.12645330086957643),
  ('off', -0.11475069871640466),
  ('a', -0.033037557430431616),
  ('range', -0.002605497544850952),
  ('of', -0.02260918255636712),
  ('new', -0.018584167379099337),
  ('features', -0.020751173053987364),
  ('found', -0.007799016129654701),
  ('in', 0.004999099620783741),
  ('the', 0.0469555483392299),
  ('proprietary', 0.04618163515391065),
  ('silicon', -0.00336963614108024),
  ('chip', -0.010346731700986423),
  ('computer', -0.11505059510445892),
  ('.', 0.12232051047511464)],
 'sports': [('<s>', 0.0),
  ('Today', 0.0),
  ('apple', 0.17777404981278105),
  ('released', 0.1003214064974551),
  ('the', 0.48201710670946674),
  ('new', -0.018599190084082673),
  ('Mac', 0.016238528172160226),
  ('book', 0.3932

In [23]:
zero_shot_explainer.visualize("zero_shot.html")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
finance,finance (0.08),finance,-0.74,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,
technology,technology (0.84),technology,1.36,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,
sports,sports (0.08),sports,1.61,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
finance,finance (0.08),finance,-0.74,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,
technology,technology (0.84),technology,1.36,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,
sports,sports (0.08),sports,1.61,#s Today apple released the new Mac book showing off a range of new features found in the proprietary silicon chip computer .
,,,,
