In [3]:
from tqdm import tqdm
import json
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage

import getpass
import os


def _set_if_undefined(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"Please provide your {var}")

_set_if_undefined("LANGCHAIN_API_KEY")

In [4]:
from utils.tools import construct_tools, get_tools_descriptions
from langgraph.prebuilt import ToolNode

tools = construct_tools()
tools_descriptions = get_tools_descriptions(tools)
tool_node = ToolNode(tools)
from langchain_openai import ChatOpenAI

Number of domains: 192


In [9]:
from langgraph.prebuilt import create_react_agent

system_prompt = ("Answer the following question. "
                 "Remember your FINAL ANSWER should be clear and concise.(a single number or phrases, not a sentence!)"
                 "Follow the format: \n"
                 "FINAL ANSWER: <your answer>")
graph = create_react_agent(llm, tools=tools, state_modifier=system_prompt)

# gsm8k

In [5]:
dataset_name = "gsm8k"
mode = "react"
num_test_sample = 200
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini", base_url="https://api.chsdw.top/v1", max_retries=3)

In [6]:
from datasets import load_dataset
dataset = load_dataset("json", data_files=f"../data/{dataset_name}.jsonl", split="train")
if num_test_sample > 0:
    dataset = dataset.select(range(num_test_sample))
dataset = dataset.map(lambda example: {"question": example["question"], "answer": example["answer"].split("#### ")[-1]})
print(dataset)

Dataset({
    features: ['question', 'answer'],
    num_rows: 200
})


In [13]:
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage, BaseMessage

async def react(item, graph, dataset_name:str="hotpot_qa") -> str:
    if dataset_name == "hotpot_qa":
        messages = [HumanMessage(content=item["question"])]
        input = {"messages": messages}
    elif dataset_name == "gsm8k":
        messages = [HumanMessage(content=f"Use python code to solve the following problem, variable <answer> should contain the final answer. Use \"print(answer)\" to get the final answer.\n{item['question']}")]
        input = {"messages": messages}
    try:
        result = await graph.ainvoke(input=input)
        return result
    except Exception as e:
        return e


In [11]:
import asyncio
results = await asyncio.gather(*(react(item, graph, dataset_name) for item in dataset))

In [23]:
save_folder = f"/Users/ariete/Projects/self-improve/output/{dataset_name}"
os.makedirs(save_folder, exist_ok=True)
with open ("/Users/ariete/Projects/self-improve/output/{}/{}_{}.json".format(dataset_name, num_test_sample, mode), "w") as f:
    for idx, result in enumerate(results):
        f.write(json.dumps({"idx": idx, "question": dataset[idx]["question"], "answer":dataset[idx]["answer"], "prediction": result["messages"][-1].content.split("FINAL ANSWER:")[-1].strip()}) + "\n")

# HotpotQA

In [42]:
dataset_name = "hotpot_qa"
mode = "react"
num_test_sample = 200
llm = ChatOpenAI(temperature=0, model="gpt-4o-mini", base_url="https://api.chsdw.top/v1", max_retries=3)

In [24]:
graph = create_react_agent(llm, tools=tools, state_modifier=system_prompt)

In [25]:
from datasets import load_dataset
dataset = load_dataset("json", data_files=f"../data/{dataset_name}.jsonl", split="train")
if num_test_sample > 0:
    dataset = dataset.select(range(num_test_sample))

print(dataset)

Dataset({
    features: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context'],
    num_rows: 200
})


In [None]:
from tqdm.asyncio import tqdm_asyncio
results = []
for data in tqdm(dataset):
    result = await react(data, graph, dataset_name)
    results.append(result)

In [44]:
idx_list = [idx for idx, result in enumerate(results) if not result]
print(idx_list)

[58, 61, 101, 105, 135, 146, 159, 165, 185, 186, 195]


In [43]:
save_folder = f"/Users/ariete/Projects/self-improve/output/{dataset_name}"
os.makedirs(save_folder, exist_ok=True)
with open ("/Users/ariete/Projects/self-improve/output/{}/{}_{}.json".format(dataset_name, num_test_sample, mode), "w") as f:
    for idx, result in enumerate(results):
        if result:
            f.write(json.dumps({"idx": idx, "question": dataset[idx]["question"], "answer":dataset[idx]["answer"], "prediction": result["messages"][-1].content.split("FINAL ANSWER:")[-1].strip()}) + "\n")
        else:
            f.write(json.dumps({"idx": idx, "question": dataset[idx]["question"], "answer":dataset[idx]["answer"], "prediction": "None"}) + "\n")