# Question Answering

In [1]:
import transformers
import shap
import torch

# load the model
pmodel = transformers.pipeline('question-answering')

# define two predictions, one that outputs the logits for the range start,
# and the other for the range end
def f(questions, start):
    outs = []
    for q in questions:
        question, context = q.split("[SEP]")
        d = pmodel.tokenizer(question, context)
        out = pmodel.model.forward(**{k: torch.tensor(d[k]).reshape(1, -1) for k in d})
        logits = out.start_logits if start else out.end_logits
        outs.append(logits.reshape(-1).detach().numpy())
    return outs
def f_start(questions):
    return f(questions, True)
def f_end(questions):
    return f(questions, False)

# attach a dynamic output_names property to the models so we can plot the tokens at each output position
def out_names(inputs):
    question, context = inputs.split("[SEP]")
    d = pmodel.tokenizer(question, context)
    return [pmodel.tokenizer.decode([id]) for id in d["input_ids"]]
f_start.output_names = out_names
f_end.output_names = out_names

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [2]:
data = ["What is on the table?[SEP]When I got home today I saw my cat on the table, and my frog on the floor."]

explainer_start = shap.Explainer(f_start, pmodel.tokenizer)
shap_values_start = explainer_start(data)

shap.plots.text(shap_values_start)

  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 2it [00:10, 10.05s/it]               


# Text Generation

In [1]:
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import shap
import torch

# tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
# model = AutoModelForCausalLM.from_pretrained("gpt2").cuda()


# tokenizer = AutoTokenizer.from_pretrained('nlpcloud/instruct-gpt-j-fp16', use_fast=True)
tokenizer = AutoTokenizer.from_pretrained('nlpcloud/instruct-gpt-j-fp16')

model = AutoModelForCausalLM.from_pretrained('nlpcloud/instruct-gpt-j-fp16', torch_dtype=torch.float16).cuda()

# tokenizer = AutoTokenizer.from_pretrained('openlm-research/open_llama_3b', use_fast=True, legacy=False)
# model = AutoModelForCausalLM.from_pretrained('openlm-research/open_llama_3b').cuda()

In [2]:
# set model decoder to true
model.config.is_decoder=True
# set text-generation params under task_specific_params
#  eos_token_id=50256, 
# full_text=False
# gen_dict = {
#     "do_sample": True,
#     "max_length": 50,
#     "temperature": 0.7,
#     "top_k": 50,
#     "no_repeat_ngram_size": 2
# }

gen_dict = dict(
    max_new_tokens=10, 
    num_beams=5, 
    no_repeat_ngram_size=3, 
    early_stopping=True, 
    eos_token_id=50256, 
)
model.config.task_specific_params = dict()
model.config.task_specific_params["text-generation"] = gen_dict

In [3]:
s = ['I enjoy walking with my cute dog']
import sys
sys.path.append('../')
import pandas as pd
from utils.functional import make_choice, create_choices, generate_fewshot_prompt_QA
from utils.samples import few_shot_QA_samples
exp_name = "openllama_COS-E"


df = pd.read_csv(f'../generated_nle/{exp_name}/valid_samples_1600_nle.csv')

s_range = range(1, 2)
batched_input_premise = df.question[s_range].tolist()
batched_input_choices = create_choices(df.loc[s_range], add_prefix=False)
batched_input_label = make_choice(df.loc[s_range])
batched_label_idx = [int(sample['label']) for _,sample in df.loc[s_range].iterrows()]

i = 0
prompt = generate_fewshot_prompt_QA(few_shot_QA_samples, batched_input_premise[i], batched_input_choices[i], batched_input_label[i], batched_label_idx[i])
# print(prompt)

# explainer = shap.Explainer(model, tokenizer)
# shap_values = explainer([prompt])
# shap_values = explainer(s)

# gen_dict = dict(
#     max_new_tokens=5, num_beams=3, no_repeat_ngram_size=3, eos_token_id=50256
# )

input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
# # prompt = tokenizer.decode(model.generate(input_ids, max_new_tokens=5, num_beams=3, no_repeat_ngram_size=3, eos_token_id=50256)[0])
# # print(prompt)
# # prompt = print(tokenizer.decode(model(input_ids).logits.argmax(-1),))
# def f(x):

#     for x_ in x:
#         input_ids = tokenizer(x_, return_tensors="pt").input_ids.to(model.device)
#         result = model.generate()
#         out = pmodel.model.forward(**{k: torch.tensor(d[k]).reshape(1, -1) for k in d})
#         logits = out.start_logits if start else out.end_logits
#         outs.append(logits.reshape(-1).detach().numpy())

# model(input_ids).logits.argmax(-1)
# model.generate


In [4]:
# model(input_ids)
# additional_gen_dict = dict(
#     **gen_dict, return_dict_in_generate=True, output_scores=True, output_hidden_states=True
# )
additional_gen_dict =  gen_dict
results = model.generate(input_ids, **additional_gen_dict)
# results.scores[0]
# results.__dict__['scores'][0].shape
# , results.scores, print(tokenizer.decode(results.sequences[0]))
# print(tokenizer.decode(results.sequences[0]))
# results.sequences
print(results)
print(tokenizer.decode(results[0]))
print(results.shape)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[  198, 24361,    25,  1002,   314,  2227,   284,  3650,   616, 19780,
         29649,   618,   314,  2492,   470,  1262,   340,    11,   644,   561,
           307,   257,   922,  1295,   329,   326,    30,   198, 22164,  1063,
            25, 37250,  2395,   824,   900,  3256,   705,  2536,  4338,  3256,
           705,    83,   726,  3650,  3256,   705,  2395,   824,   983,  3256,
           705, 17470,  1339, 20520,   198, 33706,    25, 19780,   900,   198,
           198, 24361,    25,  1867,  8318,  1022,   262,   966,   286,   530,
         10965,   284,  1194,    30,   198, 22164,  1063,    25, 37250,  2411,
           897,   341,  3256,   705,   417, 28361,   640,  3256,   705,   672,
           301,  6008,  3256,   705,  1726,   292,  1272,  3256,   705, 21084,
           434, 20520,   198, 33706,    25, 15477,   198, 50256]],
       device='cuda:0')

Question: If I wanted to store my chess pawn when I wasn't using it, what would be a good place for that?
Choices: ['c

In [39]:
# model(input_ids)
# additional_gen_dict = dict(
#     **gen_dict, return_dict_in_generate=True, output_scores=True, output_hidden_states=True
# )
additional_gen_dict =  gen_dict
results = model.generate(input_ids, **additional_gen_dict)
# results.scores[0]
# results.__dict__['scores'][0].shape
# , results.scores, print(tokenizer.decode(results.sequences[0]))
# print(tokenizer.decode(results.sequences[0]))
# results.sequences
print(results)
print(tokenizer.decode(results[0]))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[  198, 24361,    25,  1002,   314,  2227,   284,  3650,   616, 19780,
         29649,   618,   314,  2492,   470,  1262,   340,    11,   644,   561,
           307,   257,   922,  1295,   329,   326,    30,   198, 22164,  1063,
            25, 37250,  2395,   824,   900,  3256,   705,  2536,  4338,  3256,
           705,    83,   726,  3650,  3256,   705,  2395,   824,   983,  3256,
           705, 17470,  1339, 20520,   198, 33706,    25, 19780,   900,   198,
           198, 24361,    25,  1867,  8318,  1022,   262,   966,   286,   530,
         10965,   284,  1194,    30,   198, 22164,  1063,    25, 37250,  2411,
           897,   341,  3256,   705,   417, 28361,   640,  3256,   705,   672,
           301,  6008,  3256,   705,  1726,   292,  1272,  3256,   705, 21084,
           434, 20520,   198, 33706,    25,  1288, 28361,   640,   198,   198,
         24361,    25,  1867,   318,   262]], device='cuda:0')

Question: If I wanted to store my chess pawn when I wasn't using it

In [5]:
from shap.utils import safe_isinstance
safe_isinstance(model, "transformers.pipelines.Pipeline")
# masker = shap.maskers.Text(tokenizer=tokenizer)
shap_model = shap.models.TeacherForcing(model, tokenizer)
# masker = shap.maskers.OutputComposite(tokenizer, shap_model.text_generate)
masker = shap.maskers.Text(tokenizer, mask_token="...", collapse_mask_token=True)

# shap_model([prompt])
explainer = shap.Explainer(shap_model, masker)
shap_values = explainer([prompt])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


  0%|          | 0/498 [00:00<?, ?it/s]

Partition explainer: 2it [00:16, 16.03s/it]               


In [23]:
torch.vstack(results.scores)

tensor([ 1288, 28361,   640,   198], device='cuda:0')

In [38]:
results.keys()

odict_keys(['sequences', 'scores', 'hidden_states'])

In [None]:
transition_scores = model.compute_transition_scores(
    results.sequences, results.scores, results.beam_indices, normalize_logits=False
)
transition_scores

In [6]:
tokenizer.decode(results.sequences[0])

"\nQuestion: If I wanted to store my chess pawn when I wasn't using it, what would be a good place for that?\nChoices: ['chess set','strategy', 'toy store', 'chess game','small case']\nAnswer: chess set\n\nQuestion: What passes between the point of one destination to another?\nChoices: ['relaxation', 'elapsed time', 'obstacle', 'uneasiness','movement']\nAnswer: elapsed"

In [36]:
direct_forward = model(input_ids)
out_ids = direct_forward.logits
out_ids

tensor([[[ -39.8042,  -36.4311,  -39.6660,  ...,  -50.0736,  -50.2993,
           -38.4816],
         [ -84.3977,  -85.3611,  -85.2696,  ...,  -94.7733,  -93.4589,
           -83.8214],
         [-135.3677, -134.3287, -135.9463,  ..., -142.6832, -145.4111,
          -131.1903],
         ...,
         [-263.0208, -260.4779, -263.8992,  ..., -284.0743, -291.1839,
          -260.3764],
         [-257.7575, -257.2354, -262.5941,  ..., -278.8386, -284.3914,
          -260.1074],
         [ -97.0955,  -99.3979, -102.7384,  ..., -103.5457,  -99.4452,
           -95.2827]]], device='cuda:0', grad_fn=<UnsafeViewBackward0>)

In [6]:
shap.plots.text(shap_values)

In [44]:
text_data == "Cho"

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False])

In [53]:
shap_values

.values =
array([[[-3.11558662e-02,  1.91829633e-01],
        [-3.11558662e-02,  1.91829633e-01],
        [-3.02129943e-01, -8.73393967e-02],
        [-3.02129943e-01, -8.73393967e-02],
        [-2.31034228e-01, -2.05158506e-01],
        [-2.31034228e-01, -2.05158506e-01],
        [-2.31034228e-01, -2.05158506e-01],
        [-2.31034228e-01, -2.05158506e-01],
        [-1.03735321e-01, -6.01655956e-02],
        [-1.03735321e-01, -6.01655956e-02],
        [-1.03735321e-01, -6.01655956e-02],
        [-1.03735321e-01, -6.01655956e-02],
        [-2.39200548e-02, -8.82488898e-02],
        [-2.39200548e-02, -8.82488898e-02],
        [-2.39200548e-02, -8.82488898e-02],
        [-2.39200548e-02, -8.82488898e-02],
        [-2.39200548e-02, -8.82488898e-02],
        [-2.37817155e-01, -1.16330269e-01],
        [ 1.12923361e-02, -1.23330763e-01],
        [ 1.12923361e-02, -1.23330763e-01],
        [ 1.84260197e-01, -7.20022683e-02],
        [ 1.84260197e-01, -7.20022683e-02],
        [ 2.70123069e-

In [54]:
text_data = shap_values.data[0]
quesiton_end = np.arange(len(text_data))[np.where(text_data == "Cho")].max()
question = text_data[63:quesiton_end-2]
print(question)
question_shap = shap_values.values[0][63:quesiton_end-2]
print(question_shap)
positive_shap = np.mean(question_shap[..., 0])
positive_shap = np.mean(question_shap>0)


[' What' ' passes' ' between' ' the' ' point' ' of' ' one' ' destination'
 ' to' ' another']
[[ 0.13453304  0.0965683 ]
 [ 0.13453304  0.0965683 ]
 [ 0.38477564 -0.38795576]
 [ 0.38477564 -0.38795576]
 [ 0.30486447 -0.35433748]
 [ 0.30486447 -0.35433748]
 [ 0.61978615 -0.0494888 ]
 [ 0.61978615 -0.0494888 ]
 [ 0.61978615 -0.0494888 ]
 [ 0.61978615 -0.0494888 ]]
