In [1]:
import os
import json

from langchain_core.outputs import LLMResult

from agent.utils.loader import load_prompt, load_processed_data
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [2]:
dataset_name = 'hotpot_qa'
mode = "self-consistency"
model = "gpt-4o-mini"
num_samples = 1000
top_p = 0.95
# 原文使用的gpt-3 温度为0.7， n=10
temperature = 0.7
seed = 42
batch_size = 100
n = 10
processed_data_path = f"../../data/processed_data/{dataset_name}.jsonl"
save_results_path = f"../../output/inference/{model}/{dataset_name}/{mode}/num_samples_{num_samples}_top_p_{top_p}_temperature_{temperature}_seed_{seed}.jsonl"
prompt = load_prompt(dataset_name=dataset_name, mode=mode)
dataset = load_processed_data(dataset_name=dataset_name, file_path=processed_data_path)
if num_samples > 0:
	dataset = dataset.select(range(num_samples))
llm = ChatOpenAI(model=model, top_p=top_p, n=n, temperature=temperature, base_url="https://api.chsdw.top/v1", seed=seed)

prompt.pretty_print()
print(dataset[0])




Remember your answer should follow previous pattern and format.


Q: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?


A: Let's think step by step. The eastern sector of Colorado orogeny extends into the High Plains. High Plains rise in elevation from around 1,800 to 7,000 ft. FINAL ANSWER: 1,800 to 7,000 ft.


Q: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?


A: Let's think step by step. Milhouse was named after U.S. president Richard Nixon. FINAL ANSWER: Richard Nixon.


Q: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?


A: Let's think step by step. Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture. FINAL ANSWER: The Saimaa Gesture.


Q: What profession does Nicholas R

In [None]:
from tqdm.asyncio import tqdm, tqdm_asyncio
import nest_asyncio

nest_asyncio.apply()

def vote(candidate: list, split_signal: str) -> str:
	predictions = [message.split(split_signal) for message in candidate]
	return max(set(predictions), key=candidate.count)

async def inference(item: dict) -> str:
    try:
        response: LLMResult = await llm.agenerate(messages=[prompt.invoke(input=item)])
        candidate = [choice.message.content for choice in response.generations[0]]
        if dataset_name in ["hotpot_qa", "trivia_qa", "ambig_qa"]:
            result = {**item, "candidate": candidate}
        elif dataset_name in ["gsm8k", "tabmwp", "svamp"]:
            result = {**item, "candidate": candidate}
        elif dataset_name == "toxicity":
            result = {**item, "candidate": candidate}
        else:
            result = {**item, "candidate": candidate}
    except Exception as e:
        print(e)
        result = {**item, "prediction": "ERROR"}
    return result

async def self_consistency_inference() -> None:
	results = []
	if os.path.exists(save_results_path):
	    with open(save_results_path, 'r') as file:
	        for line in file:
	            results.append(json.loads(line))
	else:
		folder_path = os.path.dirname(save_results_path)
		os.makedirs(folder_path, exist_ok=True)
	
	for idx in tqdm(range(len(results), dataset.num_rows, batch_size)):
		batch = dataset.select(range(idx, min(idx+batch_size, dataset.num_rows)))
		results.extend(await tqdm_asyncio.gather(*(inference(item) for item in batch)))
		with open(save_results_path, 'w') as file:
			for result in results:
				file.write(json.dumps(result) + "\n")
