In [1]:
import os
import json
import random
from tqdm import tqdm

import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("/home/tanhexiang/gptj")
model = AutoModelForCausalLM.from_pretrained("/home/tanhexiang/gptj",torch_dtype=torch.float16).to(device)

model.eval()

  from .autonotebook import tqdm as notebook_tqdm


GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): Embedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (out_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): GPTJMLP(
          (fc_in): Linear(in_features=4096, out_features=16384, bias=True)
          (fc_out): Linear(in_features=16384, out_features=4096, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f)

In [2]:
with open('prompts/MeLLo-prompt.txt', 'r') as f:
    task_prompt = f.read()


In [5]:
generated_text_1 = tokenizer.decode(output[0], skip_special_tokens=False)
index = generated_text_1.find(Question)
print(index)
length = len(Question)
rest = generated_text_1[index+length:]
rf_index = rest.find('Retrieved fact:')
generate_q_a = rest[:rf_index]
print(":",generate_q_a,":")
#print(task_prompt+generate_q_a)#这后面有个换行


NameError: name 'output' is not defined

In [3]:
def call_gpt(cur_prompt, start):
    # 将输入文本编码为模型输入
    input_ids = tokenizer.encode(cur_prompt, return_tensors="pt").to(device)
    output = model.generate(input_ids, max_length=input_ids.size()[1]+100,num_return_sequences=1)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    rest = generated_text[start:]
    fa_index = rest.find('\n\nQuestion:')#找final_ans
    rf_index = rest.find('Retrieved fact:')
    
    if (fa_index > rf_index and rf_index!=-1 ) or fa_index == -1:
        index = rf_index
    else:
        index = fa_index

    generate_q_a = rest[:index]
    #print(generate_q_a)
    return generate_q_a
#==============================for contriever====================================
def mean_pooling(token_embeddings, mask):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings

def get_sent_embeddings(sents, contriever, tok, BSZ=32):    
    all_embs = []
    for i in tqdm(range(0, len(sents), BSZ)):
        sent_batch = sents[i:i+BSZ]
        inputs = tok(sent_batch, padding=True, truncation=True, return_tensors='pt').to("cuda")
        with torch.no_grad():
            outputs = contriever(**inputs)
            embeddings = mean_pooling(outputs[0], inputs['attention_mask'])
        all_embs.append(embeddings)
    all_embs = torch.vstack(all_embs)
    return all_embs

def retrieve_facts(query, fact_embs, contriever, tok, k=1):
    inputs = tok([query], padding=True, truncation=True, return_tensors='pt').to("cuda")
    with torch.no_grad():
        outputs = contriever(**inputs)
        query_emb = mean_pooling(outputs[0], inputs['attention_mask'])
    sim = (query_emb @ fact_embs.T)[0]
    knn = sim.topk(k, largest=True)
    return knn.indices

contriever = AutoModel.from_pretrained("/home/tanhexiang/contriever").cuda()
tokenizer_con = AutoTokenizer.from_pretrained("/home/tanhexiang/contriever")

In [4]:
with open('datasets/MQuAKE-CF-3k.json', 'r') as f:
    dataset = json.load(f)
new_facts = set()
for d in dataset:
    for r in d["requested_rewrite"]:
        new_facts.add(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
new_facts = list(new_facts)

embs = get_sent_embeddings(new_facts, contriever, tokenizer_con)

100%|██████████| 88/88 [00:02<00:00, 36.53it/s]


In [5]:
T = 10

cor = 0
tot = 0
start = len(task_prompt)

model.config.pad_token_id = model.config.eos_token_id

record_list = []
cor_list = []
for d in tqdm(dataset):
    #print(d)
    real_edit = []
    tot += 1
    hop = len(d["new_single_hops"])
    real_hop = []
    #print(hop)
    #用于记录该问题应该retrieve哪些edit fact
    for r in d["requested_rewrite"]:
        real_edit.append(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
    for h in d['new_single_hops']:
        real_hop.append(h['question'])
    cnt = 0
    for q in d["questions"]:
        cnt+=1
        retrieved_facts = []
        found_ans = False
        prompt = task_prompt + "\n\nQustion: " + q
        flag = 0
        gen_q = []
        i = 0
        for i in range(4):
            # prompt the model to generate a subquestion and a tentative answer
            start = len(prompt)
            gen = call_gpt(prompt, start)
            gen_q.append(gen)
            last_sent = gen.strip().split('\n')[-1]
            
            # if final answer is there, get the answer and exit
            if last_sent.startswith('Final answer: '):
                found_ans = True
                ans = last_sent[len("Final answer: "):]
                break
            
            # otherwise, extract the generated subquestion
            if len(gen.strip().split('\n')) < 2:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"failed_1"}
                record_list.append(record)
                flag = 1
                break # failed case
            subquestion = gen.strip().split('\n')[-2]
            if not subquestion.startswith('Subquestion: '):#生成有问题
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"failed_2"}
                record_list.append(record)
                flag = 1
                break # failed case
            subquestion = subquestion[len("Subquestion: "):]
            
            # retrieve an edited fact using the generated subquestion
            fact_ids = retrieve_facts(subquestion, embs, contriever, tokenizer_con)
            fact_sent = new_facts[fact_ids[0]]
            retrieved_facts.append(fact_sent)
            
            # put the retrieved fact at the end of the prompt, the model self-checks if it contradicts
            prompt = prompt + gen + 'Retrieved fact: ' + fact_sent + '.'
            
        prompt = prompt + gen
        
        if not found_ans:
            if flag == 0:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"no_final_ans"}
                record_list.append(record)
            continue
        # if the answer is correct
        if ans == d["new_answer"] or ans in d["new_answer_alias"]:
            cor += 1
            cor_record = {'id':tot,'hop':hop,'used_hop':i,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q}
            cor_list.append(cor_record)
            break
        else:
            record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"not_correct_ans"}
            record_list.append(record)
            
print(f'Multi-hop acc = {cor / tot} ({cor} / {tot})')

import numpy as np
np.savez('cor_list_3000_4',cor_list)
np.savez('record_list_3000_4',record_list)

  0%|          | 0/3000 [00:00<?, ?it/s]

  2%|▏         | 56/3000 [52:58<46:25:16, 56.77s/it]


KeyboardInterrupt: 

In [8]:
cor

348

In [11]:
cor_record

{'id': 2989,
 'hop': 4,
 'used_hop': 3,
 'question': 'What is the name of the capital city of the country where the author of "Rights of Man" was from?',
 'real_edit': ['The author of Rights of Man is Veronica Roth',
  'Divergent was created in the country of United Kingdom',
  'The capital of United Kingdom is Angri'],
 'retrieve_facts': ['The author of Rights of Man is Veronica Roth',
  'The capital of United Kingdom is Angri',
  'The capital of United Kingdom is Angri'],
 'real_hop:': ['Who is the author of Rights of Man?',
  'What is Veronica Roth famous for?',
  'Which country was Divergent created in?',
  'What is the capital of United Kingdom?'],
 'gen_q': ['\nSubquestion: What is the name of the country where the author of "Rights of Man" was from?\nGenerated answer: The author of "Rights of Man" was from United Kingdom.\n',
  '\nRetrieved fact contradicts to generated answer, so the intermediate answer is: United Kingdom\nSubquestion: What is the name of the capital city of Un

In [12]:
len(cor_list)

348

In [10]:
cnt = 0
for r in cor_list:
    if r['used_hop']==r['hop']:
        cnt+=1
        print(r['id'],":",r)
print(cnt)

6 : {'id': 6, 'hop': 4, 'used_hop': 4, 'question': "In which city is the headquarters of the manufacturer of Ford Transit's founding company located?", 'real_edit': ['The company that produced Ford Transit is Lotus Cars', 'Lotus Cars was founded by William Ruto', 'William Ruto is a citizen of South Korea', 'The capital of South Korea is Pasco'], 'retrieve_facts': ['The company that produced Ford Transit is Lotus Cars', 'Lotus Cars was founded by William Ruto', 'William Ruto is a citizen of South Korea', 'The capital of South Korea is Pasco'], 'real_hop:': ['Which company is Ford Transit produced by?', 'Who founded Lotus Cars?', 'What is the country of citizenship of William Ruto?', 'What is the capital of South Korea?'], 'gen_q': ["\nSubquestion: In which city is the headquarters of Ford Transit's founding company located?\nGenerated answer: The headquarters of Ford Transit's founding company is in the city of Detroit.\n", '\nRetrieved fact contradicts to generated answer, so the inter

In [13]:
import numpy as np
np.savez('cor_list_3000',cor_list)

In [10]:
cor/tot

0.084

In [54]:
record_list
with open("record_list_h100.json", "w") as f:
    json.dump(record_list, f, ensure_ascii=False)

In [32]:
record_list

[{'id': 1,
  'hop': 2,
  'question': 'Who is the head of state of the country where Ellie Kemper holds a citizenship?',
  'real_edit': ['Ellie Kemper is a citizen of Croatia'],
  'retrieve_facts': ['Ellie Kemper is a citizen of Croatia',
   'The name of the current head of state in Austria is Kingdom of Italy',
   'The name of the current head of state in Italy is Mamnoon Hussain',
   'The name of the current head of state in Italy is Mamnoon Hussain'],
  'real_hop:': ['What is the country of citizenship of Ellie Kemper?',
   'What is the name of the current head of state in Croatia?'],
  'gen_q': ['\nSubquestion: What is the name of the current head of state in the country where Ellie Kemper holds a citizenship?\nGenerated answer: The head of state of the country where Ellie Kemper holds a citizenship is President Barack Obama.\n',
   '\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Croatia\nSubquestion: What is the name of the current head of state in

单独问retrieve_fact和generated_ans看会不会判断错。

In [20]:
with open('prompts/test_contra_prompt.txt', 'r') as f:
    task_prompt_contra = f.read()

Generated_answer = "Generated answer:The head of state of Croatia is KolindaGrabar-Kitarović.\n"
Retrieved_fact = "Retrieved fact:The name of the current head of state in Austria is Kingdomof Italy."
prompt = task_prompt+'\n\n'+Generated_answer+Retrieved_fact
start = len(prompt)
print(prompt)
gen = call_gpt(prompt, start)
print(gen)

Generated answer: Ivanka Trump's spouse is Jared Kushner.
Retrieved fact: Ivanka Biden is married to Samantha Cameron.
Retrieved fact does not contradict to generated answer, so the intermediate answer is: Jared Kushner

Generated answer: The country of citizenship of Jared Kushner is United States.
Retrieved fact: Jared Kushner is a citizen of Canada.
Retrieved fact contradicts to generated answer, so the intermediate answer is: Canada

Generated answer:  The capital city of Canada is Ottawa.
Retrieved fact: The capital city of United States of America is Seattle.
Retrieved fact does not contradict to generated answer, so the intermediate answer is: Ottawa

Generated answer: Rainn Wilson is a citizen of United States of America.
Retrieved fact: Rainn Wilson is a citizen of Croatia.
Retrieved fact contradicts to generated answer, so the intermediate answer is: Croatia

Generated answer: The head of state of Croatia is President Zoran Milanović.
Retrieved fact: The name of the current h

针对retrieve_fact影响subquestion的生成，试试换个retrieve_fact或者不给retrieve_fact

In [34]:
with open('prompts/MeLLo-prompt.txt', 'r') as f:
    task_prompt = f.read()
question = "What is the capital of the country to which Lou Pearlmanbelonged?\n"
gen_1 = "\nSubquestion: What is the name of the country to which LouPearlman belonged?\nGenerated answer: The country to which Lou Pearlmanbelonged is United States of America.\n"
retrieve_1 = "Lou Pearlman is a citizen of India."
gen_2 = "\nRetrieved fact contradicts to generated answer, so the intermediateanswer is: India\nSubquestion: What is the capital of India?\nGenerated answer:The capital of India is New Delhi.\n"
retrieve_2_none = "."
retrieve_2_subject = "The capital of China is Bengaluru."
retrieve_2_object = "The capital of Spain is Beijing."
retrieve_2_relation = "The most popular city of Spain is Bengaluru."
retrieve_2_other = "The piggpy peggy is a pink pig."
retrieve_2_real = "The capital of India is Taloga."

p = task_prompt+"\n\n"+question+gen_1+'Retrieved fact: '+retrieve_1+gen_2+'Retrieved fact: '
#print(p)
prompt_none = p + retrieve_2_none
prompt_subject = p + retrieve_2_subject
prompt_object = p + retrieve_2_object
prompt_relation = p + retrieve_2_relation
prompt_other = p + retrieve_2_other
prompt_real = p + retrieve_2_real

#start = len(prompt_none)
#gen_none = call_gpt(prompt_none, start)

#start = len(prompt_subject)
#gen_subject = call_gpt(prompt_subject, start)

#start = len(prompt_object)
#gen_object = call_gpt(prompt_object, start)

start = len(prompt_relation)
gen_relation = call_gpt(prompt_relation, start)

start = len(prompt_other)
gen_other = call_gpt(prompt_other, start)

start = len(prompt_real)
gen_real = call_gpt(prompt_real, start)

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 15.78 GiB total capacity; 14.83 GiB already allocated; 13.69 MiB free; 15.40 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [37]:
print(gen_real)

.
Retrieved fact contradicts to generated answer, so the intermediate answer is: Taloga
Subquestion: What is the name of the country to which Taloga belongs?
Generated answer: The country to which Taloga belongs is India.



中文edit，英文CoT和prompt

In [10]:
with open ('new_facts_ch.txt','r',encoding='utf-8') as f:
    new_fact_ch = f.readlines()
new_facts_ch = []
for fact in new_fact_ch:
    fact = fact.strip('\n')
    new_facts_ch.append(fact)

embs_ch = get_sent_embeddings(new_facts_ch, contriever, tokenizer_con)

T = 10

cor = 0
tot = 0
start = len(task_prompt)

model.config.pad_token_id = model.config.eos_token_id

record_list_ch = []
for d in tqdm(dataset[:2]):
    #print(d)
    real_edit = []
    tot += 1
    hop = len(d["new_single_hops"])
    #print(hop)
    #用于记录该问题应该retrieve哪些edit fact
    for r in d["requested_rewrite"]:
        real_edit.append(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
    
    cnt = 0
    for q in d["questions"]:
        cnt+=1
        retrieved_facts = []
        found_ans = False
        prompt = task_prompt + "\n\nQustion: " + q
        flag = 0
        
        for i in range(4):
            # prompt the model to generate a subquestion and a tentative answer
            start = len(prompt)
            gen = call_gpt(prompt, start)
            last_sent = gen.strip().split('\n')[-1]
            
            # if final answer is there, get the answer and exit
            if last_sent.startswith('Final answer: '):
                found_ans = True
                ans = last_sent[len("Final answer: "):]
                break
            
            # otherwise, extract the generated subquestion
            if len(gen.strip().split('\n')) < 2:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"failed_1"}
                record_list_ch.append(record)
                flag = 1
                break # failed case
            subquestion = gen.strip().split('\n')[-2]
            if not subquestion.startswith('Subquestion: '):
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"failed_2"}
                record_list_ch.append(record)
                flag = 1
                break # failed case
            subquestion = subquestion[len("Subquestion: "):]
            
            # retrieve an edited fact using the generated subquestion
            fact_ids = retrieve_facts(subquestion, embs_ch, contriever, tokenizer_con)
            fact_sent = new_facts_ch[fact_ids[0]]
            retrieved_facts.append(fact_sent)
            
            # put the retrieved fact at the end of the prompt, the model self-checks if it contradicts
            prompt = prompt + gen + 'Retrieved fact: ' + fact_sent + '.'
            
        prompt = prompt + gen
        
        if not found_ans:
            if flag == 0:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"no_final_ans"}
                record_list_ch.append(record)
            continue
        # if the answer is correct
        if ans == d["new_answer"] or ans in d["new_answer_alias"]:
            cor += 1
            break
        else:
            record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"not_correct_ans"}
            record_list_ch.append(record)
            
print(f'Multi-hop acc = {cor / tot} ({cor} / {tot})')


100%|██████████| 88/88 [00:01<00:00, 49.43it/s]
 50%|█████     | 1/2 [05:00<05:00, 300.26s/it]

In [1]:
record_list_ch
with open("record_list_ch.json", "w") as f:
    json.dump(record_list_ch, f,ensure_ascii=False)

NameError: name 'record_list_ch' is not defined