In [None]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
import json
from langchain_core.messages import HumanMessage, SystemMessage
import requests
from langchain_community.tools.tavily_search import TavilySearchResults
from V1_RAG_classes import DataLoader, Chunks, EmbeddingManager, VectorStore, RAGRetriever

load_dotenv(".env")
GROQ_TOKEN = os.environ['GROQ_TOKEN']

  from .autonotebook import tqdm as notebook_tqdm


# ***UNDERSTANDING TASK***

In [2]:
llm = ChatGroq(
    model = 'llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
system_prompt = """
    You are a helpful assistant. Your task is to analyze the user's question and produce a structured classification.

    Steps:
    1. Determine the task type(s).
    Possible categories:
    - RESEARCH
    - SURVEY
    - COMPARISON
    If multiple categories apply, include all of them.

    2. Extract key information from the question, including (when applicable):
    - topic
    - time_range
    - methods / approaches
    - constraints or assumptions

    Output format:
    - Return ONLY a valid JSON object.
    - Do NOT include explanations, markdown, or extra text.
    - Use lowercase values for task_type.

    Example:

    User question:
    "Compare Transformers and LSTMs for time-series forecasting"

    Output:
    {
    "user_prompt" : ["Compare Transformers and LSTMs for time-series forecasting"]
    "task": {
    "task_type": ["comparison"],
    "topic": "time-series forecasting",
    "methods": ["transformers", "lstms"]}
    }

    Some Rules:
    1. if you are saying it is classification task type you should provide the atleast 2 diffrent methods in comparision from the user input
    you cant just provide one method in comparison and say it is a comparision task.
    """


user_input = input("\nEnter the Prompt:\n")
messages = [
    SystemMessage(content = system_prompt),

    HumanMessage(content=f'{user_input}')
]

response = llm.invoke(messages)

task_n_prompt = response.content
print(task_n_prompt)

{
"user_prompt" : ["poverty in bihar as compared to poverty in india (overall)"],
"task": {
"task_type": ["comparison"],
"topic": "poverty",
"methods": ["bihar", "india overall"]
}
}


# ***PLANNING***

In [3]:
agent_state = {
    "task": None,  
    "plan": None,    
    "current_step": 0,
    "notes": [],
    "sources": [],
    "status": "idle"
}


agent_state["task"] = json.loads(response.content)['task']

In [4]:
#system prompt for planner
planner_system_prompt = """
You are an expert research planner.

Given a structured research task, generate a step-by-step execution plan.

Rules:
- Return ONLY a valid JSON object.
- No explanations
- Steps must be from this allowed list:
  - search_sources
  - read_documents
  - extract_key_points
  - compare_methods
  - analyze_trends
  - write_report

Return format:
{
  "steps": ["step1", "step2", ...]
}
"""


planner_messages = [
    SystemMessage(content=planner_system_prompt),
    HumanMessage(content=task_n_prompt)
]
planner_response = llm.invoke(planner_messages)
print(planner_response.content)
agent_state['plan'] = json.loads(planner_response.content)['steps']
agent_state["status"] = "planned"

{
  "steps": [
    "search_sources",
    "read_documents",
    "extract_key_points",
    "compare_methods",
    "analyze_trends",
    "write_report"
  ]
}


# ***WEB SEARCHING***

In [5]:
# Generate Search queries for web searching
query_system_prompt = """
You are a search query generator.

Your task is to generate effective ACADEMIC search queries
based on the user's original question and a structured Python dictionary.

Rules:
- Generate 3 to 5 distinct academic search queries
- Queries should be suitable for Google Scholar / arXiv
- Return ONLY a valid JSON object
- No explanations, no markdown

Sample INPUT:
{
  "user_prompt": "Compare Transformers and LSTMs for time-series forecasting",
  "task": {
    "task_type": ["comparison"],
    "topic": "time-series forecasting",
    "methods": ["transformers", "lstms"]
  }
}

Sample OUTPUT:
{
  "search_queries": [
    "transformer vs lstm time series forecasting",
    "benchmark transformer lstm time series forecasting",
    "deep learning time series forecasting comparison paper"
  ]
}
"""

# LLM for Query Generation
query_llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=GROQ_TOKEN,
    temperature=0.2,       
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

query_messages = [
    SystemMessage(content=query_system_prompt),
    HumanMessage(content=task_n_prompt)
]

query_response = query_llm.invoke(query_messages)

# Parse & Validate Output
try:
    parsed_output = json.loads(query_response.content)
    search_queries = parsed_output["search_queries"]

    if not (3 <= len(search_queries) <= 5):
        raise ValueError("Expected 3–5 search queries")

except Exception as e:
    raise ValueError(f"Invalid query generator output: {e}")


print("\nGenerated Search Queries:")
search_queries


Generated Search Queries:


['poverty in bihar vs india: a comparative analysis',
 'bihar poverty rates compared to national average in india',
 'regional poverty disparities: bihar and india',
 'comparing poverty levels in bihar and all-india',
 'poverty in bihar: a comparative study with national trends']

In [6]:
#TAVILY
TAVILY_TOKEN = os.environ['TAVILY_API_KEY']

def search_infos(queries,max_results=3):
    print('Searching Internet for relevant information....\n')
    search = TavilySearchResults(max_results=max_results)
    all_results = []

    for query in queries:
        search_query = f"{query} filetype:pdf"
        results = search.invoke(search_query)
        all_results.extend(results)

    seen_urls = set()
    unique_results = []

    for item in all_results:
        if item['url'] not in seen_urls:
            unique_results.append(item)
            seen_urls.add(item['url'])

    display(seen_urls)
    print("Information Aquired from Internet//\n")
    return unique_results


#download all the pdfs
def download_files(links:list, download_folder="data/downloaded_pdfs"):
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)

    downloaded_files = {}

    # print("\n".join([res['url'] for res in all_results]))  #used for debugging
    for i, res in enumerate(links):
        url = res['url']
        # title = (res['title']).strip()

        if "pdf" in str(url):
            try:
                print(f"Downloading: {url}")
                response = requests.get(url=url, timeout=10)

                if response.status_code == 200:
                    filepath = os.path.join(download_folder, f"paper_{i}.pdf")
                    with open(filepath, "wb") as f:
                        f.write(response.content)
                    downloaded_files[f"paper_{i}.pdf"] = url

                    print(f"Download complete Saved as paper{i}.pdf")

            except Exception as e:
                print(f'failed to download {url}:\n{e}')
        
    return downloaded_files


urls = search_infos(search_queries)
downloaded_files = download_files(urls,download_folder='data/new_downloads')

Searching Internet for relevant information....



  search = TavilySearchResults(max_results=max_results)


{'https://ageconsearch.umn.edu/record/290427/files/pre_session10_M.S.Meena_India.pdf',
 'https://ewsdata.rightsindevelopment.org/files/documents/01/ADB-51180-001_5uWYO3b.pdf',
 'https://nirdprojms.in/index.php/jrd/article/download/100053/72188',
 'https://vdsa.icrisat.org/Include/reports/DRPBVLS.pdf',
 'https://www.econstor.eu/bitstream/10419/63315/1/477325734.pdf',
 'https://www.ihdindia.org/pdf/Poverty_Seminar_Report.pdf',
 'https://www.ihdindia.org/rpb/pdf/study/Chapter-3.pdf',
 'https://www.podarprabodhan.in/journals/2018/Podar%20Prabodhan%202018_06%20Poverty%20&%20HDI.pdf'}

Information Aquired from Internet//

Downloading: https://www.podarprabodhan.in/journals/2018/Podar%20Prabodhan%202018_06%20Poverty%20&%20HDI.pdf
failed to download https://www.podarprabodhan.in/journals/2018/Podar%20Prabodhan%202018_06%20Poverty%20&%20HDI.pdf:
HTTPSConnectionPool(host='www.podarprabodhan.in', port=443): Max retries exceeded with url: /journals/2018/Podar%20Prabodhan%202018_06%20Poverty%20&%20HDI.pdf (Caused by NameResolutionError("HTTPSConnection(host='www.podarprabodhan.in', port=443): Failed to resolve 'www.podarprabodhan.in' ([Errno 11002] getaddrinfo failed)"))
Downloading: https://vdsa.icrisat.org/Include/reports/DRPBVLS.pdf
Download complete Saved as paper1.pdf
Downloading: https://www.ihdindia.org/rpb/pdf/study/Chapter-3.pdf
Download complete Saved as paper2.pdf
Downloading: https://ewsdata.rightsindevelopment.org/files/documents/01/ADB-51180-001_5uWYO3b.pdf
Download complete Saved as paper3.pdf
Downloading: https://ageconsearch.umn.edu/record/290427/files/pre_

# ***READ AND RETRIEVE INFORMATIONS***

In [7]:
pdf_dir = r'data\new_downloads'

def read_documents(pdf_dir):

    print('Loading Data............')
    dataloader = DataLoader(pdf_directory=pdf_dir)
    all_pdf_docs = dataloader.process_all_pdfs()
    print('Data Loading Completed!\n')

    print('Chunking the loaded data...............')
    chunker = Chunks(all_pdf_docs)
    chunks = chunker.split_documents()
    print('chunking completed\n')

    print('Embedding chunks................')
    embed_manager = EmbeddingManager()
    embeddings = embed_manager.generate_embeddings(texts=chunks)
    print('Embedding completed\n')

    print("Storing Embedding vector.............")
    vectorstore=VectorStore(persist_directory='data/new_vector_store')
    vectorstore.add_documents(documents=chunks, embeddings=embeddings)
    print("All embeddings Stored\n")

    return embed_manager, vectorstore


embed_manager, vectorstore = read_documents(pdf_dir) 

Loading Data............
Found 6 PDF files to process

Processing paper_1.pdf ...
Loaded 12 pages from paper_1.pdf

Processing paper_2.pdf ...
Loaded 24 pages from paper_2.pdf

Processing paper_3.pdf ...
Loaded 3 pages from paper_3.pdf

Processing paper_4.pdf ...
Loaded 17 pages from paper_4.pdf

Processing paper_6.pdf ...
Loaded 70 pages from paper_6.pdf

Processing paper_7.pdf ...
Loaded 17 pages from paper_7.pdf


Total 143 Documents loaded
Data Loading Completed!

Chunking the loaded data...............
Split 143 documents into 463 chunks
chunking completed

Embedding chunks................
Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384
Generating embeddings for 463 texts...


Batches: 100%|██████████| 15/15 [00:09<00:00,  1.65it/s]


Generated embeddings with shape: (463, 384)
Embedding completed

Storing Embedding vector.............
Vector store initialized. Collection: pdf_documents
Existing documents in collection: 0
Adding 463 documents to vector store...
Successfully added 463 documents to vector store
Total documents in collection: 463
All embeddings Stored



In [8]:
#a llm for 
RETRIEVAL_QUERY_PROMPT = """
You are a retrieval query generator for a research assistant.

Your task is to generate 5 to 7 focused retrieval queries
that help retrieve information from research papers.

Rules:
- DO NOT answer the question
- DO NOT explain anything
- DO NOT include markdown
- DO NOT include bullet points
- Output ONLY a valid JSON object
- The response must start with '{' and end with '}'

Output format:
{
  "queries": [
    "query 1",
    "query 2",
    "query 3",
    "query 4",
    "query 5"
  ]
}
"""

import json
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage


def generate_retrieval_queries(user_query: str, api_key: str) -> list[str]:
    query_expansion_llm = ChatGroq(
        model="llama-3.3-70b-versatile",
        api_key=api_key,
        temperature=0.2,   # low = stable retrieval
    )

    messages = [
        SystemMessage(content=RETRIEVAL_QUERY_PROMPT),
        HumanMessage(content=user_query)
    ]

    response = query_expansion_llm.invoke(messages)
    print("RAW LLM OUTPUT:")
    print(response.content)
    print("------")
    try:
        parsed = json.loads(response.content)
        queries = parsed["queries"]

        if not (5 <= len(queries) <= 7):
            raise ValueError("Expected 5–7 retrieval queries")

        return queries

    except Exception as e:
        raise RuntimeError(f"Invalid retrieval-query output: {e}")


def multi_query_retrieve(
    retriever,
    queries: list[str],
    top_k: int = 10
) -> list[dict]:
    """
    Runs RAG retrieval for each query and merges results.
    """
    all_chunks = []

    for q in queries:
        chunks = retriever.retrieve(q, top_k=top_k)
        all_chunks.extend(chunks)

    return all_chunks


def deduplicate_chunks(chunks: list[dict]) -> list[dict]:
    seen = set()
    deduped = []

    for chunk in chunks:
        key = (
            chunk["content"][:200],  # content fingerprint
            chunk["metadata"].get("source_file")
        )

        if key not in seen:
            seen.add(key)
            deduped.append(chunk)

    return deduped

user_prompt = json.loads(task_n_prompt)['user_prompt'][0]
ragretriver = RAGRetriever(vector_store=vectorstore, embedding_manager=embed_manager)

# Step 1: expand query
retrieval_queries = generate_retrieval_queries(
    user_query=user_prompt,
    api_key=GROQ_TOKEN
)

# Step 2: retrieve per query
raw_chunks = multi_query_retrieve(
    retriever=ragretriver,
    queries=retrieval_queries,
    top_k=10
)

# Step 3: deduplicate
clean_chunks = deduplicate_chunks(raw_chunks)

# Step 4: promote to papers (you already implemented this)
papers = ragretriver.group_chunks_by_paper(clean_chunks)

RAW LLM OUTPUT:
{
  "queries": [
    "poverty rates in Bihar vs India",
    "comparative analysis of poverty in Bihar and India",
    "Bihar poverty levels compared to national average",
    "regional disparities in poverty between Bihar and India",
    "socioeconomic factors contributing to poverty in Bihar and India",
    "poverty reduction strategies in Bihar vs national initiatives",
    "Bihar's poverty trends in relation to India's overall poverty landscape"
  ]
}
------
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 74.64it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 123.07it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 125.98it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 103.25it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 111.07it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 109.89it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 100.16it/s]

Generated embeddings with shape: (1, 384)





In [9]:
papers

[{'source': 'paper_1.pdf',
  'chunks': [{'content': 'Bihar is the second poorest state after Odisha in India. The overall incidence of rural \npoverty was 62.3 per cent, much above the all India level of 37.3  per cent. Incidence of rural \npoverty in Bihar declined from 62.3 per cent in 1993 -94 to 55.7 per cent in 2004 -05 and \nfurther declined to 55.3 per cent in 2009 -10(Table 2) as against national level poverty levels \nof 37.3 per cent, 28.3  per cent and 33.8 per cent, r espectively. The rural poverty gap in Bihar \nviz-a-viz all India level, has increased from 25 per cent in 1993 to 27.3 per cent in 2004 -05 \nbut declined to 21.5 per cent in 2009 -10. Incidence of poverty has continuously declined in \nBihar during last 20 years  but number of rural poor persons increased from 31 million in \n1993-94 to 45 million in 2004-05 to 51 million in 2009-10. \nTable 2: Population Below Poverty Line In Rural Bihar (%) \nYear Incidence Or Poverty \n1993-94 62.3 \n2004-05 55.7 \n2009-1

In [10]:
verify_llm = ChatGroq(
    model = 'meta-llama/llama-4-scout-17b-16e-instruct',
    # model = 'llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

verify_system_prompt = """
You are an academic paper relevance evaluator.

Your task is to evaluate a SINGLE paper context against a user query.

You must decide:
1. Whether this paper is relevant enough to help answer the user query.
2. If relevant, classify the paper’s role.

Definitions:
- A paper is RELEVANT if it:
  • directly compares methods mentioned in the query, OR
  • provides a survey/review of the topic, OR
  • provides guidelines, benchmarks, or empirical insights related to the query.

- A paper is NOT RELEVANT if it:
  • only briefly mentions the topic without analysis, OR
  • focuses on a single narrow method without comparison or broader insight, OR
  • is unrelated to answering the user query.

If the paper is NOT relevant:
→ Output exactly:
NO

If the paper IS relevant:
→ Classify it into ONE of the following categories:
survey
comparison
method-specific
tutorial

Output rules:
- Output ONLY one token.
- Do NOT include explanations.
- Do NOT include punctuation or formatting.

"""
verified_papers = []

for paper in papers:
    for chunks in paper['chunks']:        
        verify_human_prompt = f"""
          here is the User's query:
            {task_n_prompt}

          here is the Context:
            {chunks['content']}
        """
        verify_messages = [
            SystemMessage(content=verify_system_prompt),
            HumanMessage(content=verify_human_prompt)
        ]

        response = verify_llm.invoke(verify_messages)
        if response.content in ['survey','comparison','method-specific','tutorial']:
            verified_papers.append(paper)
            is_verified = True
            break
    if is_verified:
        print(f"{paper['source']} got verified")

paper_1.pdf got verified
paper_4.pdf got verified
paper_6.pdf got verified
paper_3.pdf got verified
paper_2.pdf got verified


In [11]:
verified_papers

[{'source': 'paper_1.pdf',
  'chunks': [{'content': 'Bihar is the second poorest state after Odisha in India. The overall incidence of rural \npoverty was 62.3 per cent, much above the all India level of 37.3  per cent. Incidence of rural \npoverty in Bihar declined from 62.3 per cent in 1993 -94 to 55.7 per cent in 2004 -05 and \nfurther declined to 55.3 per cent in 2009 -10(Table 2) as against national level poverty levels \nof 37.3 per cent, 28.3  per cent and 33.8 per cent, r espectively. The rural poverty gap in Bihar \nviz-a-viz all India level, has increased from 25 per cent in 1993 to 27.3 per cent in 2004 -05 \nbut declined to 21.5 per cent in 2009 -10. Incidence of poverty has continuously declined in \nBihar during last 20 years  but number of rural poor persons increased from 31 million in \n1993-94 to 45 million in 2004-05 to 51 million in 2009-10. \nTable 2: Population Below Poverty Line In Rural Bihar (%) \nYear Incidence Or Poverty \n1993-94 62.3 \n2004-05 55.7 \n2009-1

In [12]:
#this is what that will be feeded into the the write llm.
from typing import List, Dict, Any

verified_papers
req_info = []


for papera in verified_papers:
    source = papera['source']
    for chunka in papera['chunks']:
        chunkas = chunka['content']
        # display(chunkas)
        req_info.append({
            "content":chunkas,
            "url": downloaded_files[source]
        })

In [13]:
req_info

[{'content': 'Bihar is the second poorest state after Odisha in India. The overall incidence of rural \npoverty was 62.3 per cent, much above the all India level of 37.3  per cent. Incidence of rural \npoverty in Bihar declined from 62.3 per cent in 1993 -94 to 55.7 per cent in 2004 -05 and \nfurther declined to 55.3 per cent in 2009 -10(Table 2) as against national level poverty levels \nof 37.3 per cent, 28.3  per cent and 33.8 per cent, r espectively. The rural poverty gap in Bihar \nviz-a-viz all India level, has increased from 25 per cent in 1993 to 27.3 per cent in 2004 -05 \nbut declined to 21.5 per cent in 2009 -10. Incidence of poverty has continuously declined in \nBihar during last 20 years  but number of rural poor persons increased from 31 million in \n1993-94 to 45 million in 2004-05 to 51 million in 2009-10. \nTable 2: Population Below Poverty Line In Rural Bihar (%) \nYear Incidence Or Poverty \n1993-94 62.3 \n2004-05 55.7 \n2009-10 55.3 \nDecline In Rural Poverty    \n

In [14]:
writer_llm = ChatGroq(
    model='openai/gpt-oss-20b',
    # model='llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0.3,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

writer_system_prompt = """
    Based on provided verified information in this format:
    [{
        "content":content,
        "url": url
    }]

    against a given user's QUERY

    Using ONLY the provided verified information, generate a structured
    and Detailed research-style report answering the user query.

    Include the urls for refrences

    If the provided information is insufficient to confidently answer
    any part of the query, explicitly state the missing information
    instead of guessing.

    Output format:
        {"Title":small but concise title for the report,
        "Report":"Report content in proper formatting"}
    Output rules:
    - Report should start with a title
    - Output ONLY the final report in proper docs formatting so that later it can be saved in form PDF.
    - Do NOT include extra tokens other than the report.
    
"""
x = json.loads(task_n_prompt)['user_prompt'][0]
trimmed_info = str(req_info)[:25000]
def write_report():
    writer_human_prompt = f"""
    Here is the 
        provided verified information: {trimmed_info},
        User's Query: {x}
    """

    writer_messages = [
        SystemMessage(writer_system_prompt),
        HumanMessage(writer_human_prompt)
    ]

    response = writer_llm.invoke(writer_messages)
    return response.content


final_report = write_report()

In [15]:
fr = json.loads(final_report)
print(fr['Report'])

# Poverty in Bihar Relative to National Levels: Trends and Disparities

## 1. Introduction
The following report synthesises verified data on poverty in Bihar and compares it with overall national figures for India. All information is drawn exclusively from the provided sources; no external data or assumptions are introduced.

## 2. Methodology
Data were extracted from the documents supplied in the user prompt. The key indicators used are:
- **Incidence of poverty** (percentage of population below the poverty line – BPL)
- **Poverty gap** (difference between BPL incidence in Bihar and national average)
- **Number of poor persons** (in millions)
- **Multi‑Dimensional Poverty Index (MPI)** where available

Time‑series comparisons focus on the periods 1993‑94, 2004‑05, 2009‑10, and 2011‑12 where data are available.

## 3. Findings

| Indicator | Bihar | India (overall) | Source |
|-----------|-------|-----------------|--------|
| **Rural poverty incidence** | 62.3 % (1993‑94) | 37.3 % | [1

In [16]:
from markdown_pdf import MarkdownPdf, Section

# 1. Create the PDF object
pdf = MarkdownPdf(toc_level=2)

# 2. Add your LLM report as a section
# This will automatically turn # into headers, ** into bold, etc.
pdf.add_section(Section(fr['Report']))

# 3. Save it
pdf.save(f"final_report_111.pdf")

print("Report saved with proper formatting!")

Report saved with proper formatting!


In [17]:
while agent_state["current_step"] < len(agent_state["plan"]):
    step = agent_state["plan"][agent_state["current_step"]]
    print(f"Executing step: {step}")

    if step == "search_sources":
        agent_state["notes"].append("searched sources")

    elif step == "read_documents":
        agent_state["notes"].append("read documents")

    elif step == "extract_key_points":
        agent_state["notes"].append("extracted key points")

    elif step == "compare_methods":
        agent_state["notes"].append("compared methods")

    elif step == "analyze_trends":
        agent_state["notes"].append("analyzed trends")

    elif step == "write_report":
        agent_state["notes"].append("final report written")
        break

    agent_state["current_step"] += 1

Executing step: search_sources
Executing step: read_documents
Executing step: extract_key_points
Executing step: compare_methods
Executing step: analyze_trends
Executing step: write_report


In [18]:
agent_state

{'task': {'task_type': ['comparison'],
  'topic': 'poverty',
  'methods': ['bihar', 'india overall']},
 'plan': ['search_sources',
  'read_documents',
  'extract_key_points',
  'compare_methods',
  'analyze_trends',
  'write_report'],
 'current_step': 5,
 'notes': ['searched sources',
  'read documents',
  'extracted key points',
  'compared methods',
  'analyzed trends',
  'final report written'],
 'sources': [],
 'status': 'planned'}