In [None]:
!pip3 install spacy

!python3 -m spacy download en_core_web_sm

!pip3 install anchor-exp

In [None]:
import numpy as np
import spacy
import time
import torch

from anchor import anchor_text
from transformers import pipeline

In [3]:
nlp = spacy.load('en_core_web_sm')

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

classifier = pipeline(
            "sentiment-analysis",
            model="siebert/sentiment-roberta-large-english",
            tokenizer="siebert/sentiment-roberta-large-english",
            top_k=1,
            device=device
        )

In [5]:
def predict_prob(texts):
    preds = classifier(texts)
    preds = np.array([0 if label[0]['label'] == 'NEGATIVE'
                      else 1 for label in preds])
    return preds

In [6]:
explainer = anchor_text.AnchorText(nlp, ['NEGATIVE', 'POSITIVE'], use_unk_distribution=True)

In [7]:
text = 'The little mermaid is a good story.'
pred = explainer.class_names[predict_prob([text])[0]]
print('Prediction: %s' % pred)

Prediction: POSITIVE


In [8]:
exp = explainer.explain_instance(text, predict_prob, threshold=0.95)



In [9]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())

Anchor: good AND a AND is
Precision: 1.00


In [10]:
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))

UNK UNK mermaid is a good UNK UNK
The little mermaid is a good UNK UNK
UNK UNK UNK is a good UNK UNK
UNK UNK UNK is a good story UNK
The UNK mermaid is a good UNK .
The little UNK is a good UNK .
The little mermaid is a good UNK .
The little mermaid is a good UNK UNK
UNK UNK UNK is a good story .
The little UNK is a good UNK UNK


In [11]:
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))




In [12]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=False)
exp = explainer.explain_instance(text, predict_prob, threshold=0.95)

Downloading tokenizer_config.json: 100%|██████████████████████████████████████████████████████████| 49.0/49.0 [00:00<00:00, 405kB/s]
  to_pred = torch.tensor([encoded], device=self.device)


In [13]:
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))

The classic presentation is mostly good reviews :
The fun pool is a good fun .
The wedding dance is a good movie character
The little boys is a good friend .
The aerial display is a good sight .
The outer ditch is a good example .
The first principal is Joseph good ##e .
The southern name is a good combination .
The cattle show is a good one .
The golf landscape is a good choice .


In [14]:
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))


