In [1]:
import asyncio
import os

import dspy
from dotenv import load_dotenv, find_dotenv
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.utilities.wikidata import WikidataAPIWrapper

from agent.utils.tools import BingSearchTool
from agent.utils.tools import WikidataTool, GoogleSearchTool, WikipediaTool, GoogleKnowledgeGraphTool, calculator, \
	python_interpreter

_ = load_dotenv(find_dotenv())
import nest_asyncio

nest_asyncio.apply()

In [2]:
from agent.utils.loader import load_processed_data

dataset_name = 'hotpot_qa'
mode = "self-guidance"
model = "gpt-4o-mini-2024-07-18"
num_samples = 1000
top_p = 0.95
temperature = 0
seed = 42
batch_size = 100
processed_data_path = f"../../data/processed_data/{dataset_name}.jsonl"
if "gpt-4o-mini" in model:
	save_results_path = f"../../output/inference/gpt-4o-mini/{dataset_name}/{mode}/num_samples_{num_samples}_top_p_{top_p}_temperature_{temperature}_seed_{seed}.jsonl"
else:
	save_results_path = f"../../output/inference/{model}/{dataset_name}/{mode}/num_samples_{num_samples}_top_p_{top_p}_temperature_{temperature}_seed_{seed}.jsonl"
dataset = load_processed_data(dataset_name=dataset_name, file_path=processed_data_path)

In [3]:
class Plan(dspy.Signature):
	"""Given a question or problem, generate a solving plan, which is a list of steps.\
	It doesn't need to actually solve the problem, just generate a plan.\
	Based on the overall question and plan, identify all the error prone points in the problem solving process of each step.\
	These error-prone points are the key to guiding the selection of tools and the input of the selected tools.\
	Remember that there may be multiple error-prone areas in each step, and you should list them all."""
	question: str = dspy.InputField()
	plan_and_guidance: list[dict] = dspy.OutputField(desc="Each item contains a key 'step' and a key 'guidance'.")


class Tool(dspy.Signature):
	"""Regarding current step and the guidance, choose the appropriate tools to help you complete current step.\
	 The guidance should be guiding principles to guide subsequent behavior. \
	 Before providing the selected tools and corresponding inputs, \
	 you should fully understand the functions of each tool and their specific requirements for the input content. \
	 The tool name must accurately match the existing tool (including characters such as uppercase and lowercase, spaces, etc.). \
	 Remember that there may be multiple tools that meet the requirements, and there are also some tools that are not suitable for the current step. \
	 Avoid selecting tools that are obviously unsuitable.

	You have the following tool set:
	[wiki_data] - A wrapper around Wikidata. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be the exact name of the item you want information about or a Wikidata QID.
	[google_search] - A search engine. useful for when you need to answer questions about current events. Input should be a search query.
	[calculator] - Useful when you need to calculate the value of a mathematical expression, including basic arithmetic operations. Use this tool for math operations. Input should strictly follow the numuxpr syntax.
	[python_interpreter] - Useful when you need to execute a code and get the value of the variables <answer>. Use this tool for code execution.This tool will return the code execution result and final value of the variable <answer>. Input should be pure python code string.
	[wikipedia] - A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.
	[google_knowledge_graph] - This tool searches for entities in the Google Knowledge Graph. It provides information about people, places, things, and concepts. Useful when you need to get information about a specific entity. Input should be an entity name.
	"""
	past_steps: list[dict] = dspy.InputField(desc="The past steps and guidance, ordered by the order of the steps.")
	current_step_and_guidance: dict = dspy.InputField(desc="The current step and guidance.")
	tool: dict = dspy.OutputField(
		desc="List all the appropriate tools to help you complete current step."
		     "Each item is a dictionary, the key is the tool name, the value is the input content of the tool.")


class Result(dspy.Signature):
	"""Based on current step, guidance and tool result, provide your answer to the current step. Remember that all the tool result are reliable. If the tool result contains the information you need, you should list the information as evidence for the current step."""

	step: str = dspy.InputField()
	guidance: str = dspy.InputField()
	tool_result: list[dict] = dspy.InputField()
	evidence: str = dspy.OutputField(
		desc="The evidence contains the information in the tool result which is needed for the current step answer.")
	answer: str = dspy.OutputField(desc="The answer to the current step.")


class FinalAnswer(dspy.Signature):
	"""Using the provided evidence, answer the question by extracting only the specific information required. \
	Your response should contains two part, the first part is the fusion of the Revising Process and the second part is the final answer.\
	In the final answer, do not include any explanations, context, or additional information. Just focus on delivering the exact answer as concisely as possible!!!\
	There is no need to answer the question in the form of a complete sentence, just provide the answer in the form of a noun, time, entity, yes or no, etc.\
	If there are multiple answers, please concatenate them with a comma, i.e. "answer1, answer2, answer3"."""

	question: str = dspy.InputField()
	steps: list[dict] = dspy.InputField(desc="Each item contains a key 'step', a key 'evidence' and a key 'result'.")
	fusion: str = dspy.OutputField(desc="The fusion of the Revising Process.")
	final_answer: str = dspy.OutputField(
		desc="Do not include any explanations, context, or additional information. Just focus on delivering the exact answer as concisely as possible!!! ")

In [4]:
wikidata_tool = WikidataTool(api_wrapper=WikidataAPIWrapper())
google_search_tool = GoogleSearchTool()
bing_search_tool = BingSearchTool()
wikipedia_tool = WikipediaTool(api_wrapper=WikipediaAPIWrapper())
google_knowledge_graph_tool = GoogleKnowledgeGraphTool()
tool_set = {"wiki_data": wikidata_tool, "google_search": google_search_tool,
            "calculator": calculator, "python_interpreter": python_interpreter, "wikipedia": wikipedia_tool,
            "google_knowledge_graph": google_knowledge_graph_tool}
tool_description = "\n".join([f"[{tool_name}] - {tool.description}" for tool_name, tool in tool_set.items()])


async def tool_ainvoke(tool_dictionary: dict) -> list[tuple | Exception]:
	async def run_tool(tool_name, tool_input):
		if tool_name not in tool_set:
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": "Tool not found."}
		try:
			result = await tool_set[tool_name].arun(tool_input)
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": result}
		except Exception as e:
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": str(e)}

	tasks = [run_tool(tool_name, tool_input) for tool_name, tool_input in tool_dictionary.items()]

	tool_result = await asyncio.gather(*tasks, return_exceptions=True)
	return tool_result


def tool_invoke(tool_dictionary: dict) -> list[tuple | Exception]:
	def run_tool(tool_name, tool_input):
		if tool_name not in tool_set:
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": "Tool not found."}
		try:
			result = tool_set[tool_name].run(tool_input)
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": result}
		except Exception as e:
			return {"tool_name": tool_name, "tool_input": tool_input, "tool_result": str(e)}

	tool_results = [run_tool(tool_name, tool_input) for tool_name, tool_input in tool_dictionary.items()]

	return tool_results


print(tool_description)

[wiki_data] - A wrapper around Wikidata. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be the exact name of the item you want information about or a Wikidata QID.
[google_search] - A search engine. useful for when you need to answer questions about current events. Input should be a search query. 
[calculator] - Useful when you need to calculate the value of a mathematical expression, including basic arithmetic operations. Use this tool for math operations. Input should strictly follow the numuxpr syntax.
[python_interpreter] - Useful when you need to execute a code and get the value of the variables <answer>. Use this tool for code execution.This tool will return the code execution result and final value of the variable <answer>. Input should be pure python code string.
[wikipedia] - A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, comp

In [5]:
class SelfGuidance(dspy.Module):
	def __init__(self):
		super().__init__()
		self.plan_and_guidance = dspy.Predict(Plan)
		self.tool = dspy.ChainOfThought(Tool)
		self.tool_ainvoke = tool_ainvoke
		self.cot = dspy.Predict(Result)
		self.final_answer = dspy.Predict(FinalAnswer)

	def forward(self, question: str, answer: str, **kwargs) -> dspy.Prediction:
		past_steps = []
		tool_results = []

		plan_result = self.plan_and_guidance(question=question)
		for step_and_guidance in plan_result.plan_and_guidance:
			tool_selection = self.tool(past_steps=past_steps, current_step_and_guidance=step_and_guidance)
			tool_result = tool_invoke(tool_selection.tool)
			tool_results.append(tool_result)
			cot_result = self.cot(step=step_and_guidance["step"], guidance=step_and_guidance["guidance"],
			                      tool_result=tool_result)
			past_steps.append(
				{"step": step_and_guidance["step"], "evidence": cot_result.evidence, "result": cot_result.answer})
		final_answer = self.final_answer(question=question, steps=past_steps)

		return dspy.Prediction(
			question=question,
			answer=answer,
			prediction=final_answer.final_answer,
			fusion=final_answer.fusion,
			tool_results=tool_results,
			past_steps=past_steps
		)

	async def async_forward(self, question: str, answer: str, **kwargs):
		past_steps = []
		tool_results = []
		plan_result = await self.plan_and_guidance.async_call(question=question)
		for step_and_guidance in plan_result.plan_and_guidance:
			tool_selection = await self.tool.async_call(past_steps=past_steps,
			                                            current_step_and_guidance=step_and_guidance)
			tool_result = await self.tool_ainvoke(tool_selection.tool)
			tool_results.append(tool_result)
			cot_result = await self.cot.async_call(step=step_and_guidance["step"],
			                                       guidance=step_and_guidance["guidance"], tool_result=tool_result)
			past_steps.append(
				{"step": step_and_guidance["step"], "evidence": cot_result.evidence, "result": cot_result.answer})
		final_answer = await self.final_answer.async_call(question=question, steps=past_steps)

		return dspy.Prediction(
			question=question,
			answer=answer,
			prediction=final_answer.final_answer,
			fusion=final_answer.fusion,
			tool_results=tool_results,
			past_steps=past_steps
		)


dspy.configure(lm=dspy.LM(model='openai/gpt-4o-mini-2024-07-18', api_base="https://api.chsdw.top/v1", cache=False))
self_guidance = SelfGuidance()

# Evaluation

In [6]:
from agent.utils.qa import multi_ref_score


def validate_answer(example: dspy.Example, prediction: dspy.Predict):
	em, f1, precision, recall = multi_ref_score(prediction.prediction, example.answer)
	return f1

In [None]:
from dspy.datasets import HotPotQA
from dspy.teleprompt import COPRO

teleprompter = COPRO(
	metric=validate_answer,
	verbose=True,
	breadth=10,
	depth=3,
	track_stats=True
)
kwargs = dict(num_threads=1, display_progress=True,
              display_table=0)  # Used in Evaluate class in the optimization process
dataset = HotPotQA(train_seed=1, train_size=20, eval_seed=42, dev_size=50, test_size=0)
train_set = [dspy.Example({"question": r["question"], "answer": r["answer"]}).with_inputs("question", "answer") for r in
             dataset.train]



In [52]:
compiled_prompt_opt = SelfGuidance()
compiled_prompt_opt.load(path="/Users/ariete/Projects/self-improve/agent/inference/compiled_prompt_opt.json")
print(compiled_prompt_opt)

plan_and_guidance = Predict(StringSignature(question -> plan_and_guidance
    instructions='When presented with a question or problem, provide a detailed step-by-step solving plan that outlines how to tackle the situation. For each step in your plan, identify and list all potential error-prone points that could occur during the problem-solving process. Highlighting these error-prone areas will assist in selecting the necessary tools and informing their proper usage. Ensure that you comprehensively cover all possible error-prone aspects associated with each step listed.'
    question = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Question:', 'desc': '${question}'})
    plan_and_guidance = Field(annotation=list[dict] required=True json_schema_extra={'desc': "Each item contains a key 'step' and a key 'guidance'.", '__dspy_field_type': 'output', 'prefix': 'Please specify the question or problem, and I will outline a solving plan along with 

In [54]:
from dspy import Prediction
from tqdm import tqdm
from tqdm.asyncio import tqdm_asyncio
import json

results = []


async def self_guidance_inference() -> None:
	if os.path.exists(save_results_path):
		with open(save_results_path, 'r') as file:
			for line in file:
				results.append(json.loads(line))
	else:
		folder_path = os.path.dirname(save_results_path)
		os.makedirs(folder_path, exist_ok=True)

	for idx in tqdm(range(len(results), 1000, 30)):
		batch = processed_dataset.select(range(idx, idx + 30))
		tasks = [compiled_prompt_opt.async_call(**batch[i]) for i in range(len(batch))]  # 1 到 50
		results.extend(await tqdm_asyncio.gather(*tasks))
		for idx, temp in enumerate(results):
			if isinstance(temp, Prediction):
				results[idx] = {**temp}
		with open(save_results_path, 'w') as file:
			for result in results:
				file.write(json.dumps(result) + "\n")

In [None]:
await self_guidance_inference()