In [None]:
# !pip install --quiet shap==0.39

### SHAP with transformers

This is a notebook providing a quick tutorial for loading and investigating the explainability of PLMs given a classification task such as sentiment using SHAP. Any classification task can be used really and *ideally*, the model should have been trained on that particular classification task. For more details on SHAP, see the package documentation [here](httsp://shap.readthedocs.io).




In [1]:
import argparse
import os
import sys
from datetime import datetime
from pathlib import Path

# import bios
import numpy as np
import scipy as sp
import shap
import torch
import transformers
from numpy.lib.histograms import _histogram_dispatcher

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TextClassificationPipeline,
)

# from tokenizer import Tokenizer

sys.path.append("../")
# load custom roberta sequence classifier that uses the same averaging as declutr etc
from models.transformer_plms.model_utils.roberta_mean_classifier import MeanRobertaForSequenceClassification
from models.transformer_plms.model_utils.bert_mean_classifier import MeanBertForSequenceClassification

Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)


#### Using Trained Classifier from transformer AutoSequenceForClassification

In [2]:
# set ckpt path to model trained on the classification task of interest
model_dir = "/mnt/sdc/niallt/saved_models/pseudo_classification_tasks/mimic/ckpts/transformers/embedding_analysis/icd9-triage/fewshot_200/mimic-roberta-base/mlm_only/finetuned_plm/checkpoint-875/" # your directory to be put here

In [3]:
# load model
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()


In [None]:
# can print model to check its class
model

##### If using the automodelforseqeunce classification can use transformers pipeline

In [28]:
# check the model label idx
model.config.id2label

{0: 'LABEL_0', 1: 'LABEL_1'}

In [25]:
model.classifier

RobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=7, bias=True)
)

The transformer pipeline will only work with models with the untouched AutoModel class...

In [29]:
# # using the transformers pipeline - set device=0 if cuda is wanted
pipe = transformers.pipeline(
    "text-classification",
    model=model, tokenizer=tokenizer, 
    top_k = None, # for return all scores | top_k = 1 for only top score
    
)

In [21]:
# function


def score_and_visualize(text):

    """
    Function:
        Wrapper around the pipe class to return predictions and push through shap
        explainer
    """

    explainer = shap.Explainer(pipe)
    shap_values = explainer(text)

    shap.plots.text(shap_values)

In [30]:
# explain the model's predictions
example_texts = [
    "patient presented with high blood pressure and heart palpitations",
    "severe breathing problems",
    "infant with fever and cough",
]

explainer = shap.Explainer(pipe)

In [31]:
# test on some examples
shap_values = explainer(example_texts)

Visualize the impact on all the output classes

In the plots below, when you hover your mouse over an output class you get the explanation for that output class. When you click an output class name then that class remains the focus of the explanation visualization until you click another class.

The base value is what the model outputs when the entire input text is masked, while
is the output of the model for the full original input. The SHAP values explain in an addive way how the impact of unmasking each word changes the model output from the base value (where the entire input is masked) to the final prediction value.

In [32]:
shap.plots.text(shap_values)

In [12]:
# can also use the wrapper
score_and_visualize(example_texts)

### Below is manual to use without transformers pipeline - 
__NOTE__ it is recommended to try using the pipeline method above

In [4]:
# load model
model = MeanRobertaForSequenceClassification.from_pretrained(model_dir, output_hidden_states=True)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

In [36]:
model.classifier

MeanRobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=7, bias=True)
)

In [37]:
model.config

RobertaConfig {
  "_name_or_path": "/mnt/sdc/niallt/saved_models/pseudo_classification_tasks/mimic/ckpts/transformers/embedding_analysis/icd9-triage/fewshot_200/mimic-roberta-base/mlm_only/finetuned_plm/checkpoint-875/",
  "architectures": [
    "MeanRobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidde

In [4]:
tokenizer(["hello world", "one two", "three"], return_tensors="pt", padding=True)

{'input_ids': tensor([[    0, 42891,   232,     2],
        [    0,  1264,    80,     2],
        [    0,  9983,     2,     1]]), 'attention_mask': tensor([[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 0]])}

In [5]:
# get outputs of model on example text  
outputs = model(**tokenizer(["hello world", "one two", "three"], return_tensors="pt", padding=True), output_hidden_states=False)

In [6]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.7505, -1.0241,  0.3043,  0.7820, -0.3890, -0.1926,  0.6364],
        [ 2.1929, -1.0059, -0.3385, -0.7497, -0.1821,  0.5431, -0.5674],
        [ 0.8603, -0.3381,  0.1803, -1.1643, -0.4890,  0.9879, -0.2411]],
       grad_fn=<AddmmBackward0>), hidden_states=(tensor([[[ 0.1979, -0.0567, -0.0081,  ..., -0.0259,  0.0357,  0.0185],
         [-0.1038, -0.2010, -0.1854,  ..., -0.0110, -0.4036,  0.9629],
         [ 0.2973, -0.1139,  0.0781,  ..., -0.0094,  0.0729,  0.4541],
         [ 0.1862, -0.1866, -0.1173,  ...,  0.4430, -0.0534,  0.2372]],

        [[ 0.1979, -0.0567, -0.0081,  ..., -0.0259,  0.0357,  0.0185],
         [-0.0057, -0.2980,  0.3908,  ...,  0.0773, -0.0293,  0.3668],
         [ 0.1368, -0.1205, -0.2206,  ...,  0.1302, -0.1272,  0.6696],
         [ 0.1862, -0.1866, -0.1173,  ...,  0.4430, -0.0534,  0.2372]],

        [[ 0.1979, -0.0567, -0.0081,  ..., -0.0259,  0.0357,  0.0185],
         [ 0.0361, -0.0242,  0.0769,  ..., -0

In [5]:
def model_prediction_gpu(x):
    # shap expects this form of list comprehension - it breaks when using
    # tokenizer as normal...
    tv = torch.tensor(
        [
            tokenizer.encode(v, padding="max_length", max_length=10, truncation=True)
            for v in x
        ]
    ).cuda()
    print(f"tv: {tv} with shape: {tv.shape}")
    attention_mask = (tv != 0).type(torch.int64).cuda()
    outputs = model(tv, attention_mask = attention_mask, return_dict=True)
    logits = outputs.logits
    scores = torch.nn.Softmax(dim=-1)(logits)
    val = torch.logit(scores).detach().cpu().numpy()

    return val


def model_prediction_cpu(x):
    tv = torch.tensor(
        [
            tokenizer.encode(v, padding="max_length", max_length=10, truncation=True)
            for v in x
        ]
    )
    
    attention_mask = (tv != 0).type(torch.int64).cpu()
    outputs = model(tv, return_dict=True)
    logits = outputs.logits
    scores = torch.nn.Softmax(dim=-1)(logits)
    val = torch.logit(scores).detach().numpy()
    return val

In [6]:
model.cpu()
model_prediction_cpu(["one two three", "four five six"])

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


AttributeError: 'NoneType' object has no attribute 'unsqueeze'

In [10]:
model.cuda()
model_prediction_gpu(["one two three", "dasd"])

tv: tensor([[   0, 1264,   80,  130,    2,    1,    1,    1,    1,    1],
        [   0,  417,  281,  417,    2,    1,    1,    1,    1,    1]],
       device='cuda:0') with shape: torch.Size([2, 10])


array([[-1.6946149, -2.066955 , -1.6164997, -1.6537343, -1.8054684,
        -2.0233514, -1.7485743],
       [ 3.6026292, -6.593576 , -4.921353 , -6.396279 , -4.431014 ,
        -5.9035616, -6.3318496]], dtype=float32)

In [39]:

example_texts = [
    "patient presented with high blood pressure and heart palpitations",
    "severe breathing problems",
    "infant with fever and cough",
]


In [20]:
model.classifier

MeanRobertaClassificationHead(
  (dense): Linear(in_features=768, out_features=768, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (out_proj): Linear(in_features=768, out_features=7, bias=True)
)

In [40]:

# gpu explainer
model.cuda()
gpu_explainer = shap.Explainer(
    model_prediction_gpu, tokenizer,
)

In [13]:
example_texts

['patient was left waiting with a very high blood pressure for longer than advise',
 'Patient was left waiting for 10 minutes',
 'Nothing out of the ordinary']

In [41]:
shap_values = gpu_explainer(example_texts)

tv: tensor([[    0, 50264, 50264, 50264, 50264, 50264, 50264, 50264, 50264,     2]],
       device='cuda:0') with shape: torch.Size([1, 10])
tv: tensor([[    0, 23846,  2633,    19,   239,  1925,  1164,     8,  1144,     2]],
       device='cuda:0') with shape: torch.Size([1, 10])
tv: tensor([[    0, 23846,  2633,    19,   239,  1925,  1164, 50264, 50264,     2],
        [    0, 50264, 50264, 50264, 50264, 50264, 50264,   463,  1144,     2]],
       device='cuda:0') with shape: torch.Size([2, 10])
tv: tensor([[    0, 23846,  2633,    19, 50264, 50264, 50264, 50264, 50264,     2],
        [    0, 50264, 50264, 50264,  3530,  1925,  1164, 50264, 50264,     2],
        [    0, 50264, 50264, 50264, 50264, 50264, 50264,   463,  1144,     2],
        [    0, 50264, 50264, 50264, 50264, 50264, 50264, 50264, 50264,     2],
        [    0, 23846,  2633,    19,   239,  1925,  1164,     8,  1144,     2],
        [    0, 23846,  2633,    19,   239,  1925,  1164, 50264, 50264,     2]],
       devic

In [42]:
shap.plots.text(shap_values)

In [None]:
# [x[0] for x in sorted(model.model.config.label2id.items(), key=lambda x: x[1])]

In [7]:
# test

In [1]:
import datasets
import numpy as np
import pandas as pd
import scipy as sp
import torch
import transformers

import shap

# load the emotion dataset
dataset = datasets.load_dataset("emotion", split="train")
data = pd.DataFrame({"text": dataset["text"], "emotion": dataset["label"]})

In [2]:
# load the model and tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(
    "nateraw/bert-base-uncased-emotion", use_fast=True
)
model = transformers.AutoModelForSequenceClassification.from_pretrained(
    "nateraw/bert-base-uncased-emotion"
).cpu()
labels = sorted(model.config.label2id, key=model.config.label2id.get)




TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly.  To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()


In [13]:
# this defines an explicit python function that takes a list of strings and outputs scores for each class
# def f(x):
#     tv = torch.tensor(
#         [
#             tokenizer.encode(v, padding="max_length", max_length=128, truncation=True)
#             for v in x
#         ]
#     ).cpu()
    
    
#     attention_mask = (tv != 0).type(torch.int64).cpu()
#     outputs = model(tv, attention_mask=attention_mask)[0].detach().numpy()
#     scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
#     val = sp.special.logit(scores)
#     return val

def f(x):
    batch = tokenizer(x, padding=True, truncation=True, return_tensors="pt")
    attention_mask = batch["attention_mask"]
    input_ids = batch["input_ids"]
    outputs = model(input_ids, attention_mask=attention_mask)[0].detach().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores)
    return val


In [23]:
model.device

device(type='cuda', index=0)

In [11]:
labels

['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']

In [13]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [4]:
scores = f(["cheese"])

In [11]:
scores = f_cpu(["cheese"])

In [12]:
scores

array([[-2.0453725 ,  0.27473974, -3.4782152 , -1.3042594 , -2.794848  ,
        -4.104389  ]], dtype=float32)

In [14]:
# method = "custom tokenizer"
method = "transformers tokenizer"


# build an explainer by passing a transformers tokenizer
if method == "transformers tokenizer":
    explainer = shap.Explainer(f, tokenizer, output_names=labels)

# build an explainer by explicitly creating a masker
elif method == "default masker":
    masker = shap.maskers.Text(r"\W")  # this will create a basic whitespace tokenizer
    explainer = shap.Explainer(f, masker, output_names=labels)

# build a fully custom tokenizer
elif method == "custom tokenizer":
    import re

    def custom_tokenizer(s, return_offsets_mapping=True):
        """Custom tokenizers conform to a subset of the transformers API."""
        pos = 0
        offset_ranges = []
        input_ids = []
        for m in re.finditer(r"\W", s):
            start, end = m.span(0)
            offset_ranges.append((pos, start))
            input_ids.append(s[pos:start])
            pos = end
        if pos != len(s):
            offset_ranges.append((pos, len(s)))
            input_ids.append(s[pos:])
        out = {}
        out["input_ids"] = input_ids
        if return_offsets_mapping:
            out["offset_mapping"] = offset_ranges
        return out

    masker = shap.maskers.Text(custom_tokenizer)
    explainer = shap.Explainer(f, masker, output_names=labels)

In [8]:
!pip show shap

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Name: shap
Version: 0.42.1
Summary: A unified approach to explain the output of any machine learning model.
Home-page: 
Author: 
Author-email: Scott Lundberg <slund1@cs.washington.edu>
License: MIT License
Location: /mnt/sdc/niallt/venvs/39_prompt/lib/python3.9/site-packages
Requires: cloudpickle, numba, numpy, packaging, pandas, scikit-learn, scipy, slicer, tqdm
Required-by: ferret-xai


In [15]:
shap_values = explainer(data["text"][:3])

ValueError: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).