## Imports

In [6]:
from langchain_community.vectorstores import FAISS
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from wikipedia_tools import tools_mapping, read_results_from_path
from wiki_agent import WikipediaAgent

## Environment Initialization

In [3]:
file_save_format: str = """
Question: {user_question}
Topic: {topic}
Summary: {summary}
"""  # format we expect agent to follow when saving results

system_prompt = f"""You are an expert researcher agent that leverages Wikipedia to answer questions. 
You are not to answer questions without using a tool or using information resulting from a tool. 

Tools:
You have access to these tools:
1. search_wikipedia(query: str)  
  - Always provide a non-empty query string.

2. save_results_to_path(content: str, file_name: str, file_directory: str)  
  - Always provide non-empty content and a valid file_name.
    - Content must be the summary you generated.
  - If no file_name is given, use "summary.txt".
  - If no file_directory is given, use "." (the current working directory).
  - Never output empty strings for parameters.
  - The **content must always follow this format**:  
    {file_save_format}  
    - {{user_question}} is the original user query.
    - {{topic}} is the main subject extracted (e.g., "Sudanese Civil War").
    - {{summary}} is the most recent findings or research results (never empty).  

3. get_top_k_keywords(content: str, k: int)  
  - Always provide non-empty content and a valid integer k.

4. read_results_from_path(path: str)  
  - Always provide a valid non-empty file path.

Important:
- Do NOT output empty strings ("") for any parameter.
- You must provide the keyword arguments in the parameters json, i.e. parameters={{"query":"I am asking a question."}}

Output guidelines:
You must always respond in the following JSON format:
{{"intent": ..., "status": ..., "function": {{"name": ..., "parameters": ...}}}}

- "status" is "in_progress" if another tool call is required, otherwise "done".
- Do not omit any field.

Do not include any other text outside of the JSON.
Always respond with valid JSON using standard double quotes ".
"""


questions: list[str] = [
  "What is the status of the Sudanese Civil War in August 2025?",
  "Save your sudan summary to ./results/sudan_summary.txt",
  "Extract the top 5 keywords from the contents of ./results/sudan_summary.txt",
  "Read the contents of ./random_txt.txt and summarize it."
]

In [4]:
agent = WikipediaAgent(
  model_name="llama3.1",
  tools=tools_mapping,
  system_prompt=system_prompt,
)

### Minimal agent Example

In [5]:
sudan_summary = agent.run(query=questions[0])

Step 1: intent='information_request' status='in_progress' function={'name': 'search_wikipedia', 'parameters': {'query': 'sudanese civil war current events august 2025'}} tool_result=None
Step 5: intent='inform' status='done' function={'name': 'search_wikipedia', 'parameters': {'query': 'sudanese civil war current events august 2025'}} tool_result=None


In [5]:
sudan_summary

'The current situation in the Sudanese civil war is a ongoing conflict between two rival factions of the military government of Sudan, with severe humanitarian consequences and regional implications. The conflict began on 15 April 2023 and involves the Sudanese Armed Forces (SAF) and the paramilitary Rapid Support Forces (RSF), with fighting concentrated in the capital Khartoum and Darfur region. As of February 2025, over 8.8 million people have been internally displaced, and more than 3.5 million others have fled the country as refugees. The conflict has resulted in an estimated 522,000 child deaths from famine alone, with thousands more missing or killed in targeted massacres.'

In [6]:
for step in agent.most_recent_trace:
    print(step)

intent='information seeking' status='in_progress' function={'name': 'search_wikipedia', 'parameters': {'query': 'sudanese civil war current situation'}} tool_result={'success': True, 'result': "Page: First Sudanese Civil War\nSummary: The First Sudanese Civil War (also known as the Anyanya Rebellion or Anyanya I, after the name of the rebels, a term in the Madi language which means 'snake venom') was fought from 1955 to 1972 between the northern part of Sudan and the southern Sudan region which demanded representation and more regional autonomy. The war was divided into four major stages: initial guerrilla warfare, the creation of the Anyanya insurgency, political strife within the government, and establishment of the Southern Sudan Liberation Movement. Around a million people died over the course of the nearly 17-year long war. \nAlthough the Addis Ababa Agreement ended the war in 1972, it failed to completely dispel the tensions and addressed only some of the issues stated by souther

Following question requires handling of agent context.

In [7]:
save_results = agent.run(query=questions[1])

Step 2: intent='save results' status='in_progress' function={'name': 'save_results_to_path', 'parameters': {'content': '', 'file_name': 'sudan_summary.txt', 'file_directory': './results'}} tool_result=None
File saved successfully at ./results/sudan_summary.txt.
Step 4: intent='' status='done' function={'name': 'save_results_to_path', 'parameters': {'content': 'the current situation in the sudanese civil war is a ongoing conflict between two rival factions of the military government of sudan, with severe humanitarian consequences and regional implications. the conflict began on 15 april 2023 and involves the sudanese armed forces (saf) and the paramilitary rapid support forces (rsf), with fighting concentrated in the capital khartoum and darfur region. as of february 2025, over 8.8 million people have been internally displaced, and more than 3.5 million others have fled the country as refugees. the conflict has resulted in an estimated 522,000 child deaths from famine alone, with thousa

In [8]:
save_results

'The results have been successfully saved to the specified path.'

In [9]:
for step in agent.most_recent_trace:
    print(step)

intent='save results' status='in_progress' function={'name': 'save_results_to_path', 'parameters': {'content': 'The current situation in the Sudanese civil war is a ongoing conflict between two rival factions of the military government of Sudan, with severe humanitarian consequences and regional implications. The conflict began on 15 April 2023 and involves the Sudanese Armed Forces (SAF) and the paramilitary Rapid Support Forces (RSF), with fighting concentrated in the capital Khartoum and Darfur region. As of February 2025, over 8.8 million people have been internally displaced, and more than 3.5 million others have fled the country as refugees. The conflict has resulted in an estimated 522,000 child deaths from famine alone, with thousands more missing or killed in targeted massacres.', 'file_name': 'sudan_summary.txt', 'file_directory': './results'}} tool_result={'success': True, 'result': 'f5c51616-2ce1-4db3-9bf1-afbe0b857a66'}
intent='' status='done' function={'name': 'save_resul

### Agent with long-term memory example

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = FAISS(embedding_function=embedding_model,
                        docstore=InMemoryDocstore({}),
                        index=faiss.IndexFlatL2(384), # provides the vectorstore the correct dimensions
                        index_to_docstore_id={}
            ) 
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=250,       # number of tokens per chunk
    chunk_overlap=25,      # overlap between chunks to maintain context
)

  from .autonotebook import tqdm as notebook_tqdm


#### Creating vector store that can retrieve relavent chunks to a query

In [9]:
sudan_summary_from_doc = read_results_from_path("./results/sudan_summary.txt")
chunks = text_splitter.split_text(sudan_summary_from_doc)
docs = [Document(page_content=chunk) for chunk in chunks]
vector_store.add_documents(docs)

File read successfully.


['d1e6fde5-e940-44c6-bbc4-c245affb10da',
 'fc4a5178-9f79-4e93-a717-bea32616ac06',
 '41914d10-6f62-450b-a399-f648ef3746b0']

#### RAG assisted response

In [10]:
agent2 = WikipediaAgent(
  model_name="llama3.1",
  tools=tools_mapping,
  system_prompt=system_prompt,
)

In [14]:
query = "What are the main causes of the Sudanese Civil War?"
relevant_docs = vector_store.similarity_search(query, k=2)
relevant_docs = [doc.page_content for doc in relevant_docs]
query_with_docs = f"""{query}. The following are documents from previous questions: {relevant_docs}"""

In [15]:
query_with_docs

"What are the main causes of the Sudanese Civil War?. The following are documents from previous questions: ['the current situation in the sudanese civil war is a ongoing conflict between two rival factions of the military government of sudan, with severe humanitarian consequences and regional implications. the conflict began on 15 april 2023 and involves', 'april 2023 and involves the sudanese armed forces (saf) and the paramilitary rapid support forces (rsf), with fighting concentrated in the capital khartoum and darfur region. as of february 2025, over 8.8 million people have been internally']"

In [None]:
#TODO: Fix prompt to account for RAG.