In [None]:
%pip install --quiet langchain langgraph tavily-python langchain-ollama

In [None]:
# Get research topic from user
user_input = input("Enter the research topic or question: ").strip()

# Ask if user wants to enable reflection
reflection_choice = input("Enable reflection? (y/n): ").strip().lower()
if reflection_choice in ["y", "yes", "true"]:
    use_reflection = True
    while True:
        try:
            reflection_loops = int(input("How many reflection loops?: ").strip())
            if reflection_loops >= 1:
                break
            else:
                print("Please enter a number greater than or equal to one.")
        except ValueError:
            print("Please enter a valid integer.")
else:
    use_reflection = False
    reflection_loops = 1 

In [None]:
import os
import dotenv

dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
import logging

# Configure logging for the notebook
logging.basicConfig(
    level=logging.INFO,
    format='[%(levelname)s] %(message)s'
)

In [None]:
import operator
from pydantic import BaseModel
from typing import List
from typing_extensions import Annotated

class QueryResult(BaseModel):
    title: str = None
    url: str = None
    resume: str = None

class ReportState(BaseModel):
    user_input: str = None
    final_response: str = None
    search_queries: List[str] = []
    queries_results: Annotated[List[QueryResult], operator.add]
    research_loop_count: int = 0
    use_reflection: bool = True
    reflection_loops: int = 1

In [None]:
from langchain_openai import ChatOpenAI

reasoning_llm_openai = ChatOpenAI(
    model="o4-mini-2025-04-16",
    reasoning_effort="medium",
)

default_llm_openai = ChatOpenAI(model="gpt-4.1-mini-2025-04-14")

In [None]:
agent_prompt = """
You are a research planner agent.
You are working on a project that aims to answer user's question using resources found online.

Your asnwer should be technical, detailed and well structured using up to date information.
Cite facts, data and specific informations.

Here is the user input:

<USER_INPUT>
{user_input}
</USER_INPUT>
"""

In [None]:
build_queries = agent_prompt + """
Your first objective is to build a list of queries that
will be used to find answers to the user's question.

Build only 2 queries, each query should be a single sentence.
"""

In [None]:
resume_search = agent_prompt + """
Your objective here is to analyze the web search results and make a synthesis of it.
Emphasize the most relevant information based on user's question.

After your work, another agent will use the synthesis to build a final response to the user,
so make sure the synthesis contains only useful information.

Write about maximum of two paragraphs, and use only the information obtained from the web search results.

Here is the web search results:

<SEARCH_RESULTS>
{search_results}
</SEARCH_RESULTS>
"""

In [None]:
build_final_response = agent_prompt + """
Your objective here is to develop a final response to the user using the reports made during
the web search, with their syntesis.

The response should contain something between 3 and 5 paragraphs.

Here is the web search results:

<SEARCH_RESULTS>
{search_results}
</SEARCH_RESULTS>

You must add reference citations with the number of the citation (e.g. [1], [2], etc.) at the end of each paragraph.
"""

In [None]:
reflection_instructions = """You are an expert research assistant analyzing a summary about {research_topic}.

<GOAL>
1. Review the current summary and identify knowledge gaps or aspects that need further clarification or exploration.
2. Generate 1–2 follow-up search questions that could help fill these gaps.
3. The questions should be specific enough to return relevant search results, but not overly narrow or technical.
4. Avoid overly broad or vague queries as well as too niche or academic questions unlikely to return good results on the web.
</GOAL>

<REQUIREMENTS>
- Each question should be short, clear, and self-contained.
- Prefer general-purpose terminology that search engines can easily understand.
</REQUIREMENTS>

<FORMAT>
Format your response as a JSON object with these exact keys:
- knowledge_gaps: List of descriptions of what information is missing or needs clarification
- follow_up_queries: List of 1-2 specific questions to address these gaps
</FORMAT>

<TASK>
Reflect carefully on the Summary to identify knowledge gaps and produce 1-2 follow-up queries. Then, produce your output following this JSON format:
{{
    "knowledge_gaps": [
        "The summary lacks information about performance metrics and benchmarks",
        "Missing details about implementation challenges"
    ],
    "follow_up_queries": [
        "What are typical performance benchmarks and metrics used to evaluate [specific technology]?",
        "What are the main implementation challenges when deploying [specific technology]?"
    ]
}}
</TASK>

Provide your analysis in JSON format:"""

In [None]:
def build_first_queries(state: ReportState):
    class QueryList(BaseModel):
        queries: List[str]
    
    user_input = state.user_input
    prompt = build_queries.format(user_input=user_input)
    query_llm = default_llm_openai.with_structured_output(QueryList)

    result = query_llm.invoke(prompt)

    logging.info("Generated search queries:")
    logging.info(result)
    logging.info("End of query generation.")

    return{"search_queries": result.queries}

In [None]:
from langgraph.types import Send

def spawn_researchers(state: ReportState):
    logging.info("Spawning parallel researcher agents.")
    return [Send("single_search", query) for query in state.search_queries]

In [None]:
from tavily import TavilyClient

def single_search(query: str):
    tavily_client = TavilyClient()
    results = tavily_client.search(query, 
                                   max_results=1,
                                   include_raw_content=False)
    
    url = results["results"][0]["url"]
    url_extraction = tavily_client.extract(url)

    if len(url_extraction["results"]) > 0:
        raw_content = url_extraction["results"][0]["raw_content"]
        prompt = resume_search.format(user_input=user_input, search_results=raw_content)
        llm_result = default_llm_openai.invoke(prompt)
        query_results = QueryResult(
            title=results["results"][0]["title"],
            url=url,
            resume=llm_result.content
        )
        logging.info(f"Query: {query}")
        logging.info("Search result summary:")
        logging.info(query_results)
    else:
        query_results = QueryResult(
            title=results["results"][0]["title"],
            url=url,
            resume="No content could be extracted from the provided URL."
        )
        logging.warning(f"No content extracted for query: {query} (URL: {url})")
    return {"queries_results": [query_results]}


In [None]:
def final_writer(state: ReportState):
    search_results = ""
    reference = ""
    for i, result in enumerate(state.queries_results):
        search_results += f"[{i+1}]\n\n"
        search_results += f"Title: {result.title}\n"
        search_results += f"URL: {result.url}\n"
        search_results += f"Content: {result.resume}\n\n"

        reference += f"[{i+1}] - {result.title} ({result.url})\n"
    
    prompt = build_final_response.format(user_input=state.user_input, search_results=search_results)
    
    logging.info("Compiled search results for final synthesis:")
    logging.info(search_results)
    logging.info("End of search results compilation.\n")

    llm_result = default_llm_openai.invoke(prompt)

    final_response = llm_result + "\n\nReferences:\n" + reference

    return {"final_response": final_response}

In [None]:
from langchain_core.messages import SystemMessage, HumanMessage
import json

# Updated reflect_on_summary function to handle multiple queries
def reflect_on_summary(state: ReportState):
    new_loop_count = state.research_loop_count + 1

    result = reasoning_llm_openai.invoke(
        [SystemMessage(content=reflection_instructions.format(research_topic=state.user_input)),
        HumanMessage(content=f"Reflect on our existing knowledge: \n === \n {state.queries_results}, \n === \n And now identify knowledge gaps and generate 1-2 follow-up web search queries:")]
    )
    
    try:
        # Try to parse as JSON first
        reflection_content = json.loads(result.content)

        logging.info("Reflection analysis:")
        logging.info(f"Knowledge gaps: {reflection_content.get('knowledge_gaps', [])}")
        logging.info(f"Follow-up queries: {reflection_content.get('follow_up_queries', [])}")
        logging.info("End of reflection analysis.\n")

        # Get the follow-up queries
        queries = reflection_content.get('follow_up_queries', [])

        # Check if queries is None or empty
        if not queries:
            queries = [f"Tell me more about {state.user_input}"]

        return {
            "search_queries": queries,
            "research_loop_count": new_loop_count
        }
    
    except (json.JSONDecodeError, KeyError, AttributeError):
        fallback_query = f"Tell me more about {state.user_input}"
        return {
            "search_queries": [fallback_query],
            "research_loop_count": new_loop_count
        }


In [None]:
def route_after_single_search(state: ReportState):
    if state.use_reflection and state.research_loop_count < state.reflection_loops:
        return "reflect_on_summary"
    else:
        return "final_writer"

In [None]:
from langgraph.graph import START, END, StateGraph

builder = StateGraph(ReportState)
builder.add_node("build_first_queries", build_first_queries)
builder.add_node("single_search", single_search)
builder.add_node("reflect_on_summary", reflect_on_summary)
builder.add_node("final_writer", final_writer)

builder.add_edge(START, "build_first_queries")
builder.add_conditional_edges("build_first_queries", spawn_researchers, ["single_search"])
builder.add_conditional_edges("single_search", route_after_single_search, ["reflect_on_summary", "final_writer"])
builder.add_conditional_edges("reflect_on_summary", spawn_researchers, ["single_search"])
builder.add_edge("final_writer", END)

graph = builder.compile()

In [None]:
from IPython.display import Image

Image(graph.get_graph().draw_mermaid_png())

In [None]:
response = graph.invoke({
    "user_input": user_input,
    "use_reflection": use_reflection,
    "reflection_loops": reflection_loops
})

In [None]:
print(response['final_response'].messages[0].content)