In [1]:
import spacy, shap

nlp = spacy.load('output/model-best')
tokenizer_spacy = spacy.tokenizer.Tokenizer(nlp.vocab)

doc = nlp('hey')
classes = list(doc.cats.keys())

def predict(texts):
    # convert texts to bare strings
    texts = [str(text) for text in texts]
    results = []
    for doc in nlp.pipe(texts):
        # results.append([{'label': cat, 'score': doc.cats[cat]} for cat in doc.cats])
        results.append([ doc.cats[cat] for cat in classes ])
    return results

def tok_adapter(text, return_offsets_mapping=False):
    doc = tokenizer_spacy(text)
    out = {"input_ids": [tok.norm for tok in doc]}
    if return_offsets_mapping:
        out["offset_mapping"] = [(tok.idx, tok.idx + len(tok)) for tok in doc]
    return out

explainer = shap.Explainer(
    predict,
    masker=shap.maskers.Text(tok_adapter),
    algorithm="permutation",
    output_names=classes,
    max_evals=1500
)

In [2]:
sample = '''Dag oma, kan je morgen met ons mee naar de zee? We komen je halen met de auto vertrekken om 9 uur.
Vergeet je zwempak niet mee te nemen want we gaan samen zwemmen in het zwembad van Oostende.
Natuurlijk gaan we ook iets lekkers eten en een lekkere koffie gaan drinken.'''

# Process the text using SpaCy
doc = nlp(sample)

# Get the shap values
shap_values = explainer([sample])
shap_values.output_names = classes
shap.text_plot(shap_values)
shap.text_plot(shap_values)

Permutation explainer: 2it [00:24, 24.53s/it]               


In [3]:
sample = '''Dag meiske, gaan we een badje nemen? Da's plezant hé, zo in het waterzitten?'''

# Process the text using SpaCy
doc = nlp(sample)

# Get the shap values
shap_values = explainer([sample])
shap_values.output_names = classes
shap.text_plot(shap_values)
shap.text_plot(shap_values)

Permutation explainer: 2it [00:13, 13.79s/it]               
