# Generate Reports

1. Vanilla LLM
2. Naive RAG
3. Kruppe Model

In [1]:
import pandas as pd
import asyncio
import pathlib
import json
import traceback


from kruppe.llm import OpenAILLM 

In [2]:
llm = OpenAILLM(model="gpt-4.1-mini") # using a single llm client
experiment_dir = pathlib.Path("/Users/danielliu/Workspace/fin-rag/experiments")

In [3]:
df = pd.read_csv("./reports.csv", index_col=False)
df

Unnamed: 0,category,human_report_loc,question
0,Energy (Oil),/Users/danielliu/Workspace/fin-rag/experiments...,How is ConocoPhillips positioned to deliver su...
1,Energy (Oil),/Users/danielliu/Workspace/fin-rag/experiments...,What is the outlook for Chevron Corporation’s ...
2,Energy (Oil),/Users/danielliu/Workspace/fin-rag/experiments...,What are the updated financial prospects and i...
3,Energy (Oil),/Users/danielliu/Workspace/fin-rag/experiments...,What is the investment outlook for Exxon Mobil...
4,Energy (Oil),/Users/danielliu/Workspace/fin-rag/experiments...,What is the investment outlook for ConocoPhill...
...,...,...,...
68,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,"What are the key expectations, product announc..."
69,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,"What are the current trends, risks, and invest..."
70,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,What is the current and projected financial an...
71,NVDA,/Users/danielliu/Workspace/fin-rag/experiments...,"What are the current trends, challenges, and o..."


## Kruppe

### Initialization

In [4]:
# logging imports
import logging
from logging import StreamHandler

# toolkit import
from kruppe.llm import OpenAIEmbeddingModel
from kruppe.functional.docstore.mongo_store import MongoDBStore
from kruppe.functional.rag.vectorstore.chroma import ChromaVectorStore
from kruppe.functional.rag.index.vectorstore_index import VectorStoreIndex
from kruppe.functional.rag.retriever.simple_retriever import SimpleRetriever
from kruppe.functional.rag.retriever.fusion_retriever import QueryFusionRetriever
from kruppe.functional.ragquery import RagQuery
from kruppe.functional.llmquery import LLMQuery
from kruppe.functional.newshub import NewsHub
from kruppe.functional.finhub import FinHub
from kruppe.data_source.news.nyt import NewYorkTimesData
from kruppe.data_source.news.ft import FinancialTimesData
from kruppe.data_source.news.newsapi import NewsAPIData
from kruppe.data_source.finance.yfin import YFinanceData

# researcher import
from kruppe.algorithm.librarian import Librarian
from kruppe.algorithm.coordinator import Coordinator

In [5]:
# set up logging

# handlers
formatter = logging.Formatter('%(asctime)s - %(pathname)s - %(levelname)s - %(message)s')
short_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# set up logging for jupyter notebook
ch = StreamHandler()
ch.setFormatter(formatter)
ch.setLevel(logging.INFO)

# set up logging for everything
log_file_path = "/Users/danielliu/Workspace/fin-rag/logs/everything.log"
with open(log_file_path, 'w') as f:
    pass

fh_all = logging.FileHandler(log_file_path)
fh_all.setFormatter(formatter)
fh_all.setLevel(logging.DEBUG)

log_file_path = "/Users/danielliu/Workspace/fin-rag/logs/kruppe.log"
with open(log_file_path, 'w') as f:
    pass
fh_kruppe = logging.FileHandler(log_file_path)
fh_kruppe.setFormatter(short_formatter)
fh_kruppe.setLevel(logging.DEBUG)

# set up loggers
root_logger = logging.getLogger()
root_logger.setLevel(logging.DEBUG)
root_logger.handlers.clear()
root_logger.addHandler(fh_all) # log everything to a file

kruppe_logger = logging.getLogger("kruppe")
kruppe_logger.setLevel(logging.INFO)
kruppe_logger.handlers.clear()
kruppe_logger.addHandler(fh_kruppe) # log everything to a file
kruppe_logger.propagate = False

algo_logger = logging.getLogger("kruppe.algorithm")
algo_logger.setLevel(logging.INFO)
algo_logger.addHandler(ch) # log to console


In [6]:
reset_db=False

db_name = "kruppe_librarian"
collection_name = "general_news_04_20_2025"

# Create doc store
unique_indices = [['title', 'datasource']] # NOTE: this is important to avoid duplicates
docstore = await MongoDBStore.acreate_db(
    db_name=db_name,
    collection_name=collection_name,
    unique_indices=unique_indices,
    reset_db=reset_db
)

# Create vectorstore index
embedding_model = OpenAIEmbeddingModel()
vectorstore = ChromaVectorStore(
    embedding_model=embedding_model,
    collection_name=collection_name,
    persist_path='/Volumes/Lexar/Daniel Liu/vectorstores/kruppe_librarian'
)

index = VectorStoreIndex(vectorstore=vectorstore)
simple_retriever = SimpleRetriever(index=index)
retriever = QueryFusionRetriever(
    retrievers=[simple_retriever],
    mode='rrf',
    llm=llm,
    num_queries=3
)


if reset_db:
    vectorstore.clear()

# docs = await docstore.aget_all_documents()
# print(len(docs))
# await index.async_add_documents(docs)

print("Number of documents:", docstore.size())
print("Number of chunks:", vectorstore.size())


Number of documents: 3590
Number of chunks: 29735


In [7]:
rag_query_engine = RagQuery(
    retriever = retriever,
    llm = llm
)

llm_query_engine = LLMQuery(
    llm = llm
)

news_hub = NewsHub(news_sources=[
    # NewYorkTimesData(headers_path="/Users/danielliu/Workspace/fin-rag/.nyt-headers.json"),
    FinancialTimesData(headers_path="/Users/danielliu/Workspace/fin-rag/.ft-headers.json"),
    # NewsAPIData()
])

fin_hub = FinHub(
    fin_source = YFinanceData(),
    llm = llm
)

In [8]:
toolkit_librarian = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    # news_hub.news_recent,
    # news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

toolkit_researcher = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    # news_hub.news_recent,
    # news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

In [9]:
librarian = Librarian(
    llm=llm,
    toolkit=toolkit_librarian,
    docstore=docstore,
    index=index,
    max_steps=20,
    verbose=False
)

In [10]:
tree_configs = {
    "llm": llm,
    "toolkit": toolkit_researcher,
    "docstore": docstore,
    "index": index,
    "max_step": 15,
    "max_degree": 2,
    "verbose": False
}

In [11]:
n_experts = 3

### Execution

In [12]:
# coordinator = Coordinator(
#             llm=llm,
#             tree_configs = tree_configs,
#             librarian = librarian,
#         )
# reports = await coordinator.execute("How is ConocoPhillips positioned to deliver sustained free cash flow growth and shareholder returns in the coming years amid its current investment cycle and market conditions?",
#                           n_experts=n_experts)


In [13]:
# kruppe_dir = experiment_dir / "kruppe_report"
# kruppe_report_path = kruppe_dir / f"report_{0}.json"
# if coordinator.research_reports:
#     # report_dicts = coordinator.reports_to_dict()
#     research_report_dicts = [
#             report.model_dump(mode='json') for report in coordinator.research_reports
#         ]

#     report_dicts = {
#             "research_reports": research_report_dicts,
#             "background_report": coordinator._background_report.model_dump(mode='json') if coordinator._background_report else None,
#         }

#     with open(kruppe_report_path, "w") as f:
#         json.dump(report_dicts, f, indent=4)
    
#     with open(kruppe_dir / f"report_{0}_forest.html", "w") as f:
#         f.write(coordinator.visualize_research_forest())
    
#     print(f"LLM current cost: {llm.price()}")

In [None]:
kruppe_dir = experiment_dir / "kruppe_report"
kruppe_dir.mkdir(exist_ok=True)

semaphore = asyncio.Semaphore(5)  # Limit to n concurrent tasks

async def execute_with_semaphore(question: str, i: int, semaphore: asyncio.Semaphore):
    kruppe_report_path = kruppe_dir / f"report_{i}.json"

    if (kruppe_report_path).exists():
        print(f"Report {i} already exists for question: {question}. Skipping.")
        return False
    
    if llm.price() > 200:
        print("LLM has exceeded the budget. Ending the experiment.")
        return False
    
    # 3 retries
    async with semaphore:
        coordinator = Coordinator(
            llm=llm,
            tree_configs = tree_configs,
            librarian = librarian,
        )

        try:
            print(f"Attempt to answer question: {question}")
            reports = await coordinator.execute(query=question, n_experts=n_experts)
        except* Exception as eg:  # catch *all* sub-exceptions
            print(f"Error when generating report for question: {question}")
            print("Caught ExceptionGroup:")
            for sub in eg.exceptions:
                print("--- sub-exception:")
                traceback.print_exception(type(sub), sub, sub.__traceback__)

            if coordinator.research_reports:
                print("Partial report generated. Saving it.")
            else:
                print("No report generated. Skipping.")
    
    if coordinator.research_reports:
        report_dicts = coordinator.reports_to_dict()

        with open(kruppe_report_path, "w") as f:
            json.dump(report_dicts, f, indent=4)
        
        with open(kruppe_dir / f"report_{i}_forest.html", "w") as f:
            f.write(coordinator.visualize_research_forest())
        
        print(f"Report saved for question: {question} at {kruppe_report_path}")
        print(f"LLM current cost: {llm.price()}")

        return True
    
    return False

async with asyncio.TaskGroup() as tg:
    tasks = []
    for i, row in df.iterrows():
        question = row["question"]
        task = tg.create_task(execute_with_semaphore(question, i, semaphore))
        tasks.append(task)
    
results = [task.result() for task in tasks]

# Identify rows where the report was not generated successfully
failed_indices = [i for i, success in enumerate(results) if not success]
print("Rows that did not generate a report:", failed_indices)

# Optionally, inspect the corresponding rows from the DataFrame
df.loc[failed_indices]

Report 0 already exists for question: How is ConocoPhillips positioned to deliver sustained free cash flow growth and shareholder returns in the coming years amid its current investment cycle and market conditions?. Skipping.
Report 1 already exists for question: What is the outlook for Chevron Corporation’s financial performance and shareholder returns over the next several years, and what are the key factors and risks influencing its ability to deliver free cash flow growth and competitive cash returns to investors?. Skipping.
Report 2 already exists for question: What are the updated financial prospects and investment outlook for Exxon Mobil Corporation, and how do they compare to market expectations?. Skipping.
Report 3 already exists for question: What is the investment outlook for Exxon Mobil in 2025 and beyond, considering its project startup pipeline, financial performance, and industry environment?. Skipping.
Report 4 already exists for question: What is the investment outlook

2025-04-25 12:09:24,257 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/agents.py - ERROR - LLM did not return a valid response for the reasoning step. Expected format: Thought 2: [thought] Action 2: [action]. Received Thought 2: I have the company background information for Bank of America, including its segments, services, and general description. Next, I will retrieve background information for JPMorgan Chase, a major large-cap U.S. bank that will be useful for comparative purposes regarding investment potential..
2025-04-25 12:10:12,624 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/agents.py - ERROR - LLM did not return a valid response for the reasoning step. Expected format: Thought 8: [thought] Action 8: [action]. Received Action 8: Retrieve company background information for Goldman Sachs Group, Inc..


Background report generated for query: What is the current financial and regulatory position of Wells Fargo, and what are the primary factors and potential risks affecting its outlook and investment attractiveness?
Background report generated for query: What are the key factors driving the outlook and valuation for EOG Resources, and how do these factors impact its expected financial performance and investment attractiveness for 2025?
Domain experts generated: 3 experts found from 7 generated.
Background report generated for query: What is the investment outlook for Chevron Corporation, including its expected financial performance, key risks, and potential for shareholder returns?
Background report generated for query: What is the investment outlook for Citigroup Inc., and does the company have the potential to transition from value destruction to value creation over the next few years?
Background report generated for query: What is the investment outlook for J.P. Morgan Chase & Co., i

2025-04-25 12:12:24,607 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/agents.py - ERROR - LLM did not return a valid response for the reasoning step. Expected format: Thought 19: [thought] Action 19: [action]. Received Thought 19: I have balance sheet data for China Shenhua Energy. I now have all the necessary background, news, and financial data for ConocoPhillips and its key peers. The final step is to summarize the information into a background report addressing the outlook for ConocoPhillips' free cash flow growth and financial performance, and its competitive positioning relative to peers based on the data and context gathered.

FINISH[success].


Domain experts generated: 3 experts found from 10 generated.
Domain experts generated: 3 experts found from 8 generated.




Background report generated for query: How is J.P. Morgan Chase leveraging technology—particularly artificial intelligence—to sustain and enhance its competitive position, operational efficiency, and growth prospects relative to other large-cap banks?
Domain experts generated: 3 experts found from 8 generated.




Background report generated for query: What is the current financial and operational outlook for EOG Resources, Inc., and how is the company positioned for future performance relative to market expectations and industry peers?




Background report generated for query: What are the key financial, regulatory, and business factors affecting Bank of America's performance and outlook, and how do these influence its investment potential relative to other large-cap U.S. banks?


2025-04-25 12:13:20,231 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis
2025-04-25 12:13:22,596 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis, research_direction, action


Background report generated for query: What is the outlook for ConocoPhillips’ free cash flow growth and overall financial performance over the next several years, and how does this position the company relative to its industry peers?
Domain experts generated: 3 experts found from 8 generated.
Domain experts generated: 3 experts found from 8 generated.
Domain experts generated: 3 experts found from 6 generated.
Domain experts generated: 3 experts found from 8 generated.
Background report generated for query: What is the current investment outlook for large-cap U.S. banks, particularly Citigroup, amidst prevailing policy risks and market uncertainty?


2025-04-25 12:15:55,999 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis, research_direction
2025-04-25 12:15:56,540 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis, research_direction, action
2025-04-25 12:16:16,167 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis, research_direction
2025-04-25 12:17:38,834 - /Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py - ERROR - LLM did not return a valid response for the reasoning step. Missing fields: hypothesis, research_direction


Domain experts generated: 3 experts found from 8 generated.
Error when generating report for question: What are the key factors driving the outlook and valuation for EOG Resources, and how do these factors impact its expected financial performance and investment attractiveness for 2025?
Caught ExceptionGroup:
--- sub-exception:
No report generated. Skipping.
Attempt to answer question: How should investors assess Citigroup's current risks, progress on regulatory and operational improvements, and future outlook in order to make informed investment decisions regarding its stock?


  + Exception Group Traceback (most recent call last):
  |   File "/Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/coordinator.py", line 209, in execute_hypothesis_researcher
  |     responses = await hyp_researcher.execute(query=query)
  |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py", line 302, in execute
  |     async with asyncio.TaskGroup() as tg:
  |                ~~~~~~~~~~~~~~~~~^^
  |   File "/opt/homebrew/Cellar/python@3.13/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/taskgroups.py", line 71, in __aexit__
  |     return await self._aexit(et, exc)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/opt/homebrew/Cellar/python@3.13/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/taskgroups.py", line 173, in _aexit
  |     raise BaseExceptionGroup(
  |     ...<2 lines>...
  |     ) from None
  | ExceptionGroup: unhandled err

Error when generating report for question: What is the investment outlook for Chevron Corporation, including its expected financial performance, key risks, and potential for shareholder returns?
Caught ExceptionGroup:
--- sub-exception:
No report generated. Skipping.
Attempt to answer question: What is the outlook for PNC Financial’s Treasury Management business, and what factors are expected to drive its future growth and competitive positioning within the industry?


  + Exception Group Traceback (most recent call last):
  |   File "/Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/coordinator.py", line 209, in execute_hypothesis_researcher
  |     responses = await hyp_researcher.execute(query=query)
  |                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/Users/danielliu/Workspace/fin-rag/src/kruppe/algorithm/hypothesis.py", line 302, in execute
  |     async with asyncio.TaskGroup() as tg:
  |                ~~~~~~~~~~~~~~~~~^^
  |   File "/opt/homebrew/Cellar/python@3.13/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/taskgroups.py", line 71, in __aexit__
  |     return await self._aexit(et, exc)
  |            ^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "/opt/homebrew/Cellar/python@3.13/3.13.2/Frameworks/Python.framework/Versions/3.13/lib/python3.13/asyncio/taskgroups.py", line 173, in _aexit
  |     raise BaseExceptionGroup(
  |     ...<2 lines>...
  |     ) from None
  | ExceptionGroup: unhandled err

## Vanilla LLM

In [None]:
from kruppe.prompts.experiments import VANILLA_QA_SYSTEM
async def vanilla_llm_response(question: str):
    messages = [
        {"role": "system", "content": VANILLA_QA_SYSTEM},
        {"role": "user", "content": question},
    ]

    llm_response = await llm.async_generate(messages)
    llm_string = llm_response.text
    return llm_string

# example:
await vanilla_llm_response(df.question[0])

In [None]:
vanilla_reports = await asyncio.gather(*[vanilla_llm_response(question) for question in df.question])
vanilla_reports

In [None]:
vanilla_titles = await asyncio.gather(*[generate_title(report) for report in vanilla_reports])
vanilla_titles

In [None]:
vanilla_report_dir = experiment_dir / "vanilla_report"
vanilla_report_dir.mkdir(exist_ok=True)

file_paths = []
for i, report in enumerate(vanilla_reports):
    title = vanilla_titles[i]
    file_path = vanilla_report_dir / f"{title}.txt"
    file_path.write_text(report)
    file_paths.append(file_path.as_posix())
    print(f"Saved report {title}")

df["vanilla_report_loc"] = file_paths
df

In [None]:
df.to_csv("./reports.csv", index=False)