In [5]:
import os
import json
import random
from tqdm import tqdm

import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM

device = "cuda:1"
tokenizer = AutoTokenizer.from_pretrained("/home/tanhexiang/gptj")
model = AutoModelForCausalLM.from_pretrained("/home/tanhexiang/gptj",torch_dtype=torch.float16).to(device)

model.eval()

GPTJForCausalLM(
  (transformer): GPTJModel(
    (wte): Embedding(50400, 4096)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-27): 28 x GPTJBlock(
        (ln_1): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attn): GPTJAttention(
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (out_proj): Linear(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): GPTJMLP(
          (fc_in): Linear(in_features=4096, out_features=16384, bias=True)
          (fc_out): Linear(in_features=16384, out_features=4096, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f)

In [20]:
#input_text = "Question: What is the capital city of the country of citizenship of Ivanka Trump's spouse?\nSubquestion: Who is Ivanka Trump's spouse?\nGenerated answer: Ivanka Trump's spouse is Jared Kushner.\nRetrieved fact: Ivanka Biden is married to Samantha Cameron.\nRetrieved fact does not contradict to generated answer, so the intermediate answer is: Jared Kushner\nSubquestion: What is the country of citizenship of Jared Kushner?\nGenerated answer: The country of citizenship of Jared Kushner is United States.\nRetrieved fact: Jared Kushner is a citizen of Canada.\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Canada\nSubquestion: What is the capital city of Canada?\nGenerated answer:  The capital city of Canada is Ottawa.\nRetrieved fact: The capital city of United States of America is Seattle.\nRetrieved fact does not contradict to generated answer, so the intermediate answer is: Ottawa\nFinal answer: Ottawa\n\nQuestion: Who is the head of state of the country where Rainn Wilson holds a citizenship?\nSubquestion: What is the country of citizenship of Rainn Wilson?\nGenerated answer: Rainn Wilson is a citizen of United States of America.\nRetrieved fact: Rainn Wilson is a citizen of Croatia.\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Croatia\nSubquestion: What is the name of the current head of state in Croatia?\nGenerated answer: The head of state of Croatia is President Zoran Milanović.\nRetrieved fact: The name of the current head of state in Croatia is Kolinda Grabar-Kitarović.\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Kolinda Grabar-Kitarović\nFinal answer: Kolinda Grabar-Kitarović\n\nQuestion: On which continent is the country of citizenship of the founder of the manufacturer of iPhone 5 situated?"
with open('prompts/MeLLo-prompt.txt', 'r') as f:
    task_prompt = f.read()

In [22]:
def call_gpt(cur_prompt, start):
    # 将输入文本编码为模型输入
    input_ids = tokenizer.encode(cur_prompt, return_tensors="pt").to(device)
    output = model.generate(input_ids, max_length=input_ids.size()[1]+100,num_return_sequences=1)
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    rest = generated_text[start:]
    fa_index = rest.find('\n\nQuestion:')#找final_ans
    rf_index = rest.find('Retrieved fact:')
    
    if (fa_index > rf_index and rf_index!=-1 ) or fa_index == -1:
        index = rf_index
    else:
        index = fa_index

    generate_q_a = rest[:index]
    #print(generate_q_a)
    return generate_q_a
#==============================for contriever====================================
def mean_pooling(token_embeddings, mask):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings

def get_sent_embeddings(sents, contriever, tok, BSZ=32):    
    all_embs = []
    for i in tqdm(range(0, len(sents), BSZ)):
        sent_batch = sents[i:i+BSZ]
        inputs = tok(sent_batch, padding=True, truncation=True, return_tensors='pt').to(device)
        with torch.no_grad():
            outputs = contriever(**inputs)
            embeddings = mean_pooling(outputs[0], inputs['attention_mask'])
        all_embs.append(embeddings.cpu())
    all_embs = torch.vstack(all_embs)
    return all_embs

def retrieve_facts(query, fact_embs, contriever, tok, k=1):
    inputs = tok([query], padding=True, truncation=True, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = contriever(**inputs)
        query_emb = mean_pooling(outputs[0], inputs['attention_mask']).cpu()
    sim = (query_emb @ fact_embs.T)[0]
    knn = sim.topk(k, largest=True)
    return knn.indices

contriever = AutoModel.from_pretrained("/home/tanhexiang/contriever").cuda()
tokenizer_con = AutoTokenizer.from_pretrained("/home/tanhexiang/contriever")

In [8]:
with open('datasets/MQuAKE-CF-3k.json', 'r') as f:
    dataset = json.load(f)
new_facts = set()
for d in dataset:
    for r in d["requested_rewrite"]:
        new_facts.add(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
new_facts = list(new_facts)

embs = get_sent_embeddings(new_facts, contriever, tokenizer_con)

100%|██████████| 88/88 [00:01<00:00, 60.27it/s]


In [23]:
T = 10

cor = 0
tot = 0
start = len(task_prompt)

model.config.pad_token_id = model.config.eos_token_id

record_list = []
cor_list = []
for d in tqdm(dataset):
    #print(d)
    real_edit = []
    tot += 1
    hop = len(d["new_single_hops"])
    real_hop = []
    #print(hop)
    #用于记录该问题应该retrieve哪些edit fact
    for r in d["requested_rewrite"]:
        real_edit.append(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
    for h in d['new_single_hops']:
        real_hop.append(h['question'])
    cnt = 0
    for q in d["questions"]:
        cnt+=1
        retrieved_facts = []
        found_ans = False
        prompt = task_prompt + "\n\nQustion: " + q
        flag = 0
        gen_q = []
        i = 0
        for i in range(5):
            # prompt the model to generate a subquestion and a tentative answer
            start = len(prompt)
            gen = call_gpt(prompt, start)
            gen_q.append(gen)
            last_sent = gen.strip().split('\n')[-1]
            
            # if final answer is there, get the answer and exit
            if last_sent.startswith('Final answer: '):
                found_ans = True
                ans = last_sent[len("Final answer: "):]
                break
            
            # otherwise, extract the generated subquestion
            if len(gen.strip().split('\n')) < 2:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"failed_1"}
                record_list.append(record)
                flag = 1
                break # failed case
            subquestion = gen.strip().split('\n')[-2]
            if not subquestion.startswith('Subquestion: '):#生成有问题
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"failed_2"}
                record_list.append(record)
                flag = 1
                break # failed case
            subquestion = subquestion[len("Subquestion: "):]
            
            # retrieve an edited fact using the generated subquestion
            fact_ids = retrieve_facts(subquestion, embs, contriever, tokenizer_con)
            fact_sent = new_facts[fact_ids[0]]
            retrieved_facts.append(fact_sent)
            
            # put the retrieved fact at the end of the prompt, the model self-checks if it contradicts
            prompt = prompt + gen + 'Retrieved fact: ' + fact_sent + '.'
            
        prompt = prompt + gen
        
        if not found_ans:
            if flag == 0:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"no_final_ans"}
                record_list.append(record)
            continue
        # if the answer is correct
        if ans == d["new_answer"] or ans in d["new_answer_alias"]:
            cor += 1
            cor_record = {'id':tot,'hop':hop,'used_hop':i,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q}
            cor_list.append(cor_record)
            break
        else:
            record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'real_hop:':real_hop,'gen_q':gen_q,'answer':"not_correct_ans"}
            record_list.append(record)
            
print(f'Multi-hop acc = {cor / tot} ({cor} / {tot})')

import numpy as np
np.savez('cor_list_3000_5',cor_list)
np.savez('record_list_3000_5',record_list)

  0%|          | 0/3000 [00:05<?, ?it/s]


RuntimeError: "addmm_impl_cpu_" not implemented for 'Half'

In [1]:
cor_record


NameError: name 'cor_record' is not defined

In [15]:
cor_list


[{'id': 1,
  'hop': 2,
  'used_hop': 2,
  'question': 'What is the name of the head of state of the country that Ellie Kemper is a citizen of?',
  'real_edit': ['Ellie Kemper is a citizen of Croatia'],
  'retrieve_facts': ['Ellie Kemper is a citizen of Croatia',
   'The name of the current head of state in Austria is Kingdom of Italy'],
  'real_hop:': ['What is the country of citizenship of Ellie Kemper?',
   'What is the name of the current head of state in Croatia?'],
  'gen_q': ['\nSubquestion: What is the name of the country that Ellie Kemper is a citizen of?\nGenerated answer: The name of the head of state of the country that Ellie Kemper is a citizen of is Barack Obama.\n',
   '\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Croatia\nSubquestion: What is the name of the current head of state in Croatia?\nGenerated answer: The name of the current head of state in Croatia is Kolinda Grabar-Kitarović.\n',
   '\nRetrieved fact does not contradict to g

In [2]:
cnt = 0
for r in cor_list:
    if r['used_hop']<r['hop']:
        cnt+=1
        print(r['id'],":",r)
print(cnt)

NameError: name 'cor_list' is not defined

In [10]:
cor/tot

0.084

In [8]:
record_list
with open("record_list.json", "w") as f:
    json.dump(record_list, f, ensure_ascii=False)

0.084

In [32]:
record_list

[{'id': 1,
  'hop': 2,
  'question': 'Who is the head of state of the country where Ellie Kemper holds a citizenship?',
  'real_edit': ['Ellie Kemper is a citizen of Croatia'],
  'retrieve_facts': ['Ellie Kemper is a citizen of Croatia',
   'The name of the current head of state in Austria is Kingdom of Italy',
   'The name of the current head of state in Italy is Mamnoon Hussain',
   'The name of the current head of state in Italy is Mamnoon Hussain'],
  'real_hop:': ['What is the country of citizenship of Ellie Kemper?',
   'What is the name of the current head of state in Croatia?'],
  'gen_q': ['\nSubquestion: What is the name of the current head of state in the country where Ellie Kemper holds a citizenship?\nGenerated answer: The head of state of the country where Ellie Kemper holds a citizenship is President Barack Obama.\n',
   '\nRetrieved fact contradicts to generated answer, so the intermediate answer is: Croatia\nSubquestion: What is the name of the current head of state in

中文edit，英文CoT和prompt

In [10]:
with open ('new_facts_ch.txt','r',encoding='utf-8') as f:
    new_fact_ch = f.readlines()
new_facts_ch = []
for fact in new_fact_ch:
    fact = fact.strip('\n')
    new_facts_ch.append(fact)

embs_ch = get_sent_embeddings(new_facts_ch, contriever, tokenizer_con)

T = 10

cor = 0
tot = 0
start = len(task_prompt)

model.config.pad_token_id = model.config.eos_token_id

record_list_ch = []
for d in tqdm(dataset[:2]):
    #print(d)
    real_edit = []
    tot += 1
    hop = len(d["new_single_hops"])
    #print(hop)
    #用于记录该问题应该retrieve哪些edit fact
    for r in d["requested_rewrite"]:
        real_edit.append(f'{r["prompt"].format(r["subject"])} {r["target_new"]["str"]}')
    
    cnt = 0
    for q in d["questions"]:
        cnt+=1
        retrieved_facts = []
        found_ans = False
        prompt = task_prompt + "\n\nQustion: " + q
        flag = 0
        
        for i in range(4):
            # prompt the model to generate a subquestion and a tentative answer
            start = len(prompt)
            gen = call_gpt(prompt, start)
            last_sent = gen.strip().split('\n')[-1]
            
            # if final answer is there, get the answer and exit
            if last_sent.startswith('Final answer: '):
                found_ans = True
                ans = last_sent[len("Final answer: "):]
                break
            
            # otherwise, extract the generated subquestion
            if len(gen.strip().split('\n')) < 2:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"failed_1"}
                record_list_ch.append(record)
                flag = 1
                break # failed case
            subquestion = gen.strip().split('\n')[-2]
            if not subquestion.startswith('Subquestion: '):
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"failed_2"}
                record_list_ch.append(record)
                flag = 1
                break # failed case
            subquestion = subquestion[len("Subquestion: "):]
            
            # retrieve an edited fact using the generated subquestion
            fact_ids = retrieve_facts(subquestion, embs_ch, contriever, tokenizer_con)
            fact_sent = new_facts_ch[fact_ids[0]]
            retrieved_facts.append(fact_sent)
            
            # put the retrieved fact at the end of the prompt, the model self-checks if it contradicts
            prompt = prompt + gen + 'Retrieved fact: ' + fact_sent + '.'
            
        prompt = prompt + gen
        
        if not found_ans:
            if flag == 0:
                record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"no_final_ans"}
                record_list_ch.append(record)
            continue
        # if the answer is correct
        if ans == d["new_answer"] or ans in d["new_answer_alias"]:
            cor += 1
            break
        else:
            record = {'id':tot,'hop':hop,'question':q,'real_edit':real_edit,'retrieve_facts':retrieved_facts,'answer':"not_correct_ans"}
            record_list_ch.append(record)
            
print(f'Multi-hop acc = {cor / tot} ({cor} / {tot})')

100%|██████████| 88/88 [00:01<00:00, 49.43it/s]
 50%|█████     | 1/2 [05:00<05:00, 300.26s/it]

In [1]:
record_list_ch
with open("record_list_ch.json", "w") as f:
    json.dump(record_list_ch, f,ensure_ascii=False)

NameError: name 'record_list_ch' is not defined

In [7]:
import numpy as np
cor_list = np.load('cor_list_4_llama.npz',allow_pickle=True)
len(cor_list['arr_0'])

424