In [1]:
import numpy as np
import spacy
import time
import torch

from anchor import anchor_text
from transformers import pipeline

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import AutoTokenizer, AutoModelForSequenceClassification

classifier = pipeline(
            "sentiment-analysis",
            model="siebert/sentiment-roberta-large-english",
            tokenizer="siebert/sentiment-roberta-large-english",
            top_k=1,
            device=device
        )

In [4]:
def predict_prob(texts):
    preds = classifier(texts)
    preds = np.array([0 if label[0]['label'] == 'NEGATIVE'
                      else 1 for label in preds])
    return preds

In [5]:
explainer = anchor_text.AnchorText(nlp, ['NEGATIVE', 'POSITIVE'], use_unk_distribution=True)

In [6]:
text = 'The little mermaid is a good story.'
pred = explainer.class_names[predict_prob([text])[0]]
print('Prediction: %s' % pred)

Prediction: POSITIVE


In [None]:
exp = explainer.explain_instance(text, predict_prob, threshold=0.95)

In [8]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())

Anchor: good AND a AND is
Precision: 1.00


In [9]:
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))

UNK UNK mermaid is a good story UNK
The UNK UNK is a good story UNK
UNK UNK UNK is a good UNK UNK
The little mermaid is a good story UNK
The UNK mermaid is a good UNK UNK
The UNK UNK is a good story UNK
The little mermaid is a good UNK UNK
The little mermaid is a good story .
The little UNK is a good story UNK
The little mermaid is a good story .


In [10]:
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))




In [11]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=False)
exp = explainer.explain_instance(text, predict_prob, threshold=0.95)

In [12]:
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))

The glad ##stone is a good story .
The dough ##nut is a good bread !
The rock track is a good selection .
The copper ##head is a good method .
The black knight is a good party !
The open batsman is a good friend .
The color matching is typically good visibility .
The mineral ##ization is a good mineral .
The The process is reasonably good distribution .
The greatest strength is always good value :


In [13]:
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

The universal variety is slightly good reads :
The little tower is a good ruin .
The third reality is too good luck .
The great divide is not good here !
The population background is below good estimates .
The little friend is a good liar ;
The primary obstacle is finding good data ;
