In [2]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from lime.lime_text import LimeTextExplainer

In [3]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.eval()

Downloading vocab.json: 100%|██████████| 1.04M/1.04M [00:00<00:00, 1.08MB/s]
Downloading merges.txt: 100%|██████████| 456k/456k [00:00<00:00, 1.25MB/s]
Downloading tokenizer.json: 100%|██████████| 1.36M/1.36M [00:01<00:00, 1.07MB/s]
Downloading config.json: 100%|██████████| 665/665 [00:00<00:00, 3.39MB/s]
Downloading model.safetensors: 100%|██████████| 548M/548M [05:59<00:00, 1.52MB/s] 
Downloading generation_config.json: 100%|██████████| 124/124 [00:00<00:00, 900kB/s]


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [11]:
tokenizer.pad_token = tokenizer.eos_token

In [16]:
import numpy as np

In [17]:
def predict_proba(texts):
    outputs = []
    for text in texts:
        inputs = tokenizer.encode_plus(text, return_tensors='pt', truncation=True, padding='max_length', max_length=tokenizer.model_max_length)
        with torch.no_grad():
            logits = model(**inputs).logits
        last_logits = logits[:, -1, :]
        probabilities = torch.nn.functional.softmax(last_logits, dim=-1)
        outputs.append(probabilities.cpu().numpy()[0])
    return np.array(outputs)

In [6]:
text_to_explain = "How you doing today"

In [7]:
explainer = LimeTextExplainer(class_names=['token_probability'])

In [19]:
exp = explainer.explain_instance(text_to_explain, predict_proba, num_features=6,num_samples=50)

In [20]:
print(exp.as_list())

[('How', -0.003474972971109041), ('doing', 0.0015593182323584952), ('today', 0.0014845432256675422), ('you', 0.0013364085007617492)]
