In [1]:
import torch
import numpy as np
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer,TextClassificationPipeline
import shap

In [2]:
from transformers import DistilBertForSequenceClassification, DistilBertConfig, AutoModelForTokenClassification

# retreive the saved model 
model = DistilBertForSequenceClassification.from_pretrained('C:\RMM\Medical3\distilbert-base-uncased-finetuned-sst-2-english', 
                                                        local_files_only=True)

model.to('cuda')

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [3]:
tokenizer_name = "distilbert-base-uncased"
tokenizer = transformers.DistilBertTokenizerFast.from_pretrained(tokenizer_name)
pipe=TextClassificationPipeline(tokenizer=tokenizer,model=model,device=0,return_all_scores=True)

In [5]:
cor_x = np.load("test2.npy",allow_pickle=True)
cor_x=cor_x[:200]
cor_reviews = [review[0] for review in cor_x]
cor_labels = [review[1] for review in cor_x]

In [7]:
def mask_top_k(k, pred_label_no_mask, values, returned_tokens):
    """
    masked the k tokens that have the max shap values
    :param k: specify the largest k value
    :param values: shap values
    :param returned_tokens: a list of tokens
    :return: review, which is a str constructed from a list words
    """
    shap_values_neg, shap_values_pos = zip(*values)
    values = shap_values_neg if pred_label_no_mask==0 else shap_values_pos
    # print(values)
    values = np.array(values)
    # ids_top_k = np.argpartition(values, -k)[-k:]
    ids_top_k = (-values).argsort()[:k]
    for idx in ids_top_k:
        # print(idx)
        returned_tokens[idx] = "[UNK] "
    masked_review = "".join(returned_tokens)
    # print(masked_review)
    return masked_review

In [8]:
def predict_label(pipe, masked_review):
    """
    predict the label for the masked_review
    :param pipe: pipeline
    :param masked_review: string
    :return: 0 or 1, indicating the label
    """
    prediction = pipe([masked_review])
    if prediction[0]['label'] == 'NEGATIVE':
            neg_score = prediction[0]["score"]
            pos_score=1-prediction[0]["score"]
    else:
            pos_score = prediction[0]["score"] 
            neg_score = 1-prediction[0]["score"]       
    pred_label = 0 if prediction[0]['label'] == 'NEGATIVE'  else 1
    return pred_label

In [10]:
def mask_top_k(k, pred_label_no_mask, values, returned_tokens):
    """
    masked the k tokens that have the max shap values
    :param k: specify the largest k value
    :param values: shap values
    :param returned_tokens: a list of tokens
    :return: review, which is a str constructed from a list words
    """
    shap_values_neg, shap_values_pos = zip(*values)
    values = shap_values_neg if pred_label_no_mask==0 else shap_values_pos
    # print(values)
    values = np.array(values)
    # ids_top_k = np.argpartition(values, -k)[-k:]
    ids_top_k = (-values).argsort()[:k]
    for idx in ids_top_k:
        # print(idx)
        returned_tokens[idx] = "[UNK] "
    masked_review = "".join(returned_tokens)
    # print(masked_review)
    return masked_review

def predict_label(pipe, masked_review):
    """
    predict the label for the masked_review
    :param pipe: pipeline
    :param masked_review: string
    :return: 0 or 1, indicating the label
    """
    prediction = pipe([masked_review])
    neg_prediction_score = prediction[0][0]["score"]
    pos_prediction_score = prediction[0][1]["score"]
    pred_label = 0 if neg_prediction_score > pos_prediction_score else 1
    return pred_label

In [11]:
def model_prediction_gpu(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', 
                                        max_length=80, truncation=True) for v in x]).cuda()
    attention_mask = (tv!=0).type(torch.int64).cuda()
    outputs = model(tv, attention_mask=attention_mask)[0]
    scores = torch.nn.Softmax(dim=-1)(outputs)
    val = torch.logit(scores).detach().cpu().numpy()

    return val

In [30]:
shap_values_list = []
token_data_list = []
top_k = [1, 3, 5, 7,9,11,13]
all_labels =[]
# use GPU
gpu_explainer = shap.Explainer(model_prediction_gpu, tokenizer)
i = 0
for review, label in zip(cor_reviews, cor_labels):
    print(f"process {i}-th review")
    i += 1
    label4review =[]
    label4review.append(label)
    # to-do: truncate review if len(review)>80
    tokens = tokenizer.tokenize(review)
    if len(tokens) > 80:
        tokens_truncated = tokens[:80]
        review = " ".join(token for token in tokens_truncated)
    pred_label_no_mask = predict_label(pipe, review) # predicted label for review without mask
    label4review.append(pred_label_no_mask)
    shap_values = gpu_explainer([review])
    values = shap_values.values[0] # 2-dim ndarray
    returned_tokens = shap_values.data[0]
    for k in top_k:
        masked_review = mask_top_k(k, pred_label_no_mask, values, returned_tokens) # mask review by the shap values
        predicted_label= predict_label(pipe, masked_review)
        label4review.append(predicted_label)
    # label4review = [True_label, pred_label_without_mask, masked_label_1, masked_label_2, masked_review_3, masked_review_4]
    all_labels.append(label4review)

process 0-th review
process 1-th review
process 2-th review
process 3-th review
process 4-th review
process 5-th review
process 6-th review
process 7-th review
process 8-th review
process 9-th review
process 10-th review
process 11-th review
process 12-th review
process 13-th review
process 14-th review
process 15-th review
process 16-th review
process 17-th review
process 18-th review
process 19-th review
process 20-th review
process 21-th review
process 22-th review
process 23-th review
process 24-th review
process 25-th review
process 26-th review
process 27-th review
process 28-th review
process 29-th review
process 30-th review
process 31-th review
process 32-th review
process 33-th review
process 34-th review
process 35-th review
process 36-th review
process 37-th review
process 38-th review
process 39-th review
process 40-th review
process 41-th review
process 42-th review
process 43-th review
process 44-th review
process 45-th review
process 46-th review
process 47-th review
pr

In [31]:
import pandas as pd
df = pd.DataFrame(np.array(all_labels))

In [32]:
filtered_df = df.loc[df[0]==df[1]]
len(filtered_df)

153

In [16]:
from sklearn.metrics import accuracy_score

In [27]:
print(accuracy_score(filtered_df[1], filtered_df[2]), accuracy_score(filtered_df[1], filtered_df[3]), accuracy_score(filtered_df[1], filtered_df[4]), accuracy_score(filtered_df[1], filtered_df[5]))

0.7777777777777778 0.46405228758169936 0.3464052287581699 0.3333333333333333


In [29]:
accuracy_masked=[accuracy_score(filtered_df[1], filtered_df[2]), accuracy_score(filtered_df[1], filtered_df[3]), accuracy_score(filtered_df[1], filtered_df[4]), accuracy_score(filtered_df[1], filtered_df[5])]

In [38]:
accuracy_masked

[0.7777777777777778,
 0.46405228758169936,
 0.3464052287581699,
 0.3333333333333333]

In [33]:
print(accuracy_score(filtered_df[1], filtered_df[2]), accuracy_score(filtered_df[1], filtered_df[3]), accuracy_score(filtered_df[1], filtered_df[4]), accuracy_score(filtered_df[1], filtered_df[5]),accuracy_score(filtered_df[1], filtered_df[6]),accuracy_score(filtered_df[1], filtered_df[7]),accuracy_score(filtered_df[1], filtered_df[8]))

0.7777777777777778 0.5816993464052288 0.46405228758169936 0.3660130718954248 0.3464052287581699 0.35947712418300654 0.3333333333333333
