In [15]:
# !pip install anchor-exp
# !python -m spacy download en_core_web_lg

# # if you want to use BERT to perturb inputs (recommended), also install transformers:
# !pip install torch transformers spacy && python -m spacy download en_core_web_sm

In [1]:

%load_ext autoreload
%autoreload 2
import os
import os.path
import numpy as np
import sklearn
import sklearn.model_selection
import sklearn.linear_model
import sklearn.ensemble
import spacy
import sys
from sklearn.feature_extraction.text import CountVectorizer
from anchor import anchor_text
import time

In [2]:
nlp = spacy.load('en_core_web_sm')

In [16]:
# load data
import pickle
import joblib
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer

n_features = 10000

# import  and vectorize data
df = pd.read_csv('data/imdb.csv')

data = df.text
labels = df.target

vectorizer = CountVectorizer(min_df=1, max_features=n_features, binary=True)
vectorizer.fit(data)

# load the classifier params

model_params_filename = 'models/predictions/svc_imdb_' + \
    str(n_features) + '.sav'
classifier = pickle.load(open(model_params_filename, 'rb'))

vectorizer_filename = f'models/vectorizer/vectorizer_imdb_{str(n_features)}.pkl'
with open(vectorizer_filename, 'rb') as file:
    vectorizer = pickle.load(file)

In [4]:
from data import Imdb
imdb = Imdb(n_features=n_features)

In [17]:
def predict_lr(texts):
    return imdb.model.predict(vectorizer.transform(texts))

In [18]:
explainer = anchor_text.AnchorText(
    nlp, ['negative', 'positive'], use_unk_distribution=True)

np.random.seed(1)

text = data.iloc[0]

# text = 'this is a good book'



pred = explainer.class_names[predict_lr([text])[0]]

alternative = explainer.class_names[1 - predict_lr([text])[0]]

print('Prediction: %s' % pred)

b = time.time()
exp = explainer.explain_instance(

    text, predict_lr, threshold=0.95, verbose=False, onepass=True)

print('Time: %s' % (time.time() - b))
print('Anchor: %s' % (' AND '.join(exp.names())))

Prediction: positive
Time: 0.09473586082458496
Anchor: gem


In [65]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts %s:' % alternative)
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Anchor: say AND played
Precision: 0.97

Examples where anchor applies and model predicts positive:

first UNK UNK UNK six UNK love first game UNK UNK loved UNK UNK time good plot great courses best UNK heard nintendo game remember UNK UNK think UNK UNK kidnapped bowser UNK UNK UNK game UNK UNK curses UNK five UNK challnges UNK stars secert parts UNK UNK stars UNK UNK bowser UNK three levels beat bowser UNK start UNK UNK UNK character gets UNK UNK toad basically UNK alliances heard UNK yoshi game towards end main villain bowser bunch UNK like boo goomba characters really great UNK UNK people say gameplay important UNK UNK UNK great plot UNK graphics UNK UNK whole bunch nintendo UNK UNK graphics UNK super mario bright colors great effects awesome UNK UNK found UNK water UNK UNK UNK UNK UNK UNK best UNK game music favorite part UNK UNK played young age gladly UNK game UNK music would put sleep especially UNK jolly UNK UNK UNK UNK others UNK especially UNK UNK ones stick favorites UNK UNK 