# Testing Coordinator

In [1]:
# logging imports
import logging
from logging import StreamHandler

# essential kruppe imports
from kruppe.llm import OpenAILLM

# toolkit import
from kruppe.llm import OpenAIEmbeddingModel
from kruppe.functional.docstore.mongo_store import MongoDBStore
from kruppe.functional.rag.vectorstore.chroma import ChromaVectorStore
from kruppe.functional.rag.index.vectorstore_index import VectorStoreIndex
from kruppe.functional.rag.retriever.simple_retriever import SimpleRetriever
from kruppe.functional.rag.retriever.fusion_retriever import QueryFusionRetriever
from kruppe.functional.ragquery import RagQuery
from kruppe.functional.llmquery import LLMQuery
from kruppe.functional.newshub import NewsHub
from kruppe.functional.finhub import FinHub
from kruppe.data_source.news.nyt import NewYorkTimesData
from kruppe.data_source.news.ft import FinancialTimesData
from kruppe.data_source.news.newsapi import NewsAPIData
from kruppe.data_source.finance.yfin import YFinanceData

# researcher import
from kruppe.algorithm.librarian import Librarian
from kruppe.algorithm.coordinator import Coordinator

In [2]:
# set up logging

# handlers

# set up logging for jupyter notebook
ch = StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
ch.setLevel(logging.INFO)

# set up logging for everything
log_file_path = "/Users/danielliu/Workspace/fin-rag/logs/everything.log"
with open(log_file_path, 'w') as f:
    pass

fh_all = logging.FileHandler(log_file_path)
fh_all.setFormatter(formatter)
fh_all.setLevel(logging.DEBUG)

# set up logging for llm
log_file_path = "/Users/danielliu/Workspace/fin-rag/logs/llm.log"
with open(log_file_path, 'w') as f:
    pass

fh_llm = logging.FileHandler(log_file_path)
fh_llm.setFormatter(formatter)
fh_llm.setLevel(logging.DEBUG)

# set up logging for scraper
log_file_path = "/Users/danielliu/Workspace/fin-rag/logs/scraper.log"
with open(log_file_path, 'w') as f:
    pass

fh_scraper = logging.FileHandler(log_file_path)
fh_scraper.setFormatter(formatter)
fh_scraper.setLevel(logging.DEBUG)

# set up loggers
root_logger = logging.getLogger()
root_logger.setLevel(logging.ERROR)
root_logger.handlers.clear()
root_logger.addHandler(fh_all) # log everything to a file

kruppe_logger = logging.getLogger("kruppe.algorithm")
kruppe_logger.setLevel(logging.INFO)
kruppe_logger.addHandler(ch) # log to console

logger = logging.getLogger("kruppe.llm")
logger.setLevel(logging.DEBUG)
logger.addHandler(fh_llm) # log llm output to a file instead of console.
logger.propagate = False # prevent logging from propagating to the root logger

logger_scraper = logging.getLogger("kruppe.data_source.scraper")
logger_scraper.setLevel(logging.DEBUG)
logger_scraper.addHandler(fh_scraper) # log scraper output to a file instead of console.
logger_scraper.propagate = False # prevent logging from propagating to the console

In [3]:
reset_db=False

db_name = "kruppe_librarian"
collection_name = "general_news_04_20_2025"

# Create doc store
unique_indices = [['title', 'datasource']] # NOTE: this is important to avoid duplicates
docstore = await MongoDBStore.acreate_db(
    db_name=db_name,
    collection_name=collection_name,
    unique_indices=unique_indices,
    reset_db=reset_db
)

# Create vectorstore index
embedding_model = OpenAIEmbeddingModel()
vectorstore = ChromaVectorStore(
    embedding_model=embedding_model,
    collection_name=collection_name,
    persist_path='/Volumes/Lexar/Daniel Liu/vectorstores/kruppe_librarian'
)

index = VectorStoreIndex(vectorstore=vectorstore)
simple_retriever = SimpleRetriever(index=index)
retriever = QueryFusionRetriever(
    retrievers=[simple_retriever],
    mode='rrf',
    llm=OpenAILLM(),
    num_queries=3
)


if reset_db:
    vectorstore.clear()

# vectorstore.clear()
# docs = await docstore.aget_all_documents()
# print(len(docs))
# await index.async_add_documents(docs)


In [4]:
print("Number of documents:", docstore.size())
print("Number of chunks:", vectorstore.size())

Number of documents: 1718
Number of chunks: 16797


In [5]:
rag_query_engine = RagQuery(
    retriever = retriever,
    llm = OpenAILLM()
)

llm_query_engine = LLMQuery(
    llm = OpenAILLM()
)

news_hub = NewsHub(news_sources=[
    NewYorkTimesData(headers_path="../../.nyt-headers.json"),
    FinancialTimesData(headers_path="../../.ft-headers.json"),
    NewsAPIData()
])

fin_hub = FinHub(
    fin_source = YFinanceData(),
    llm = OpenAILLM()
)

In [6]:
toolkit_librarian = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    news_hub.news_recent,
    news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

toolkit_researcher = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    # news_hub.news_recent,
    # news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

# Coordinator

In [7]:
query = "Should I invest in NVIDIA right now?"

In [8]:
llm = OpenAILLM(model="gpt-4.1-mini")
librarian = Librarian(
    llm=llm,
    toolkit=toolkit_librarian,
    docstore=docstore,
    index=index,
    max_steps=20
)

In [9]:
tree_configs = {
    "llm": OpenAILLM(),
    "toolkit": toolkit_researcher,
    "docstore": docstore,
    "index": index,
    "max_step": 15,
    "max_degree": 2
}

In [10]:
coordinator = Coordinator(
    llm=llm,
    tree_configs = tree_configs,
    librarian=librarian,
)

### Generate Domain Experts

In [11]:
# experts = await coordinator.generate_domain_experts(query)
# experts

In [12]:
# filtered_experts = await coordinator.filter_domain_experts(query, experts, 3)
# filtered_experts

### Generate Background

In [13]:
# bkg_report = await coordinator.generate_background(query)

In [14]:
# print(bkg_report.text)

### Execute

In [15]:
reports = await coordinator.execute(query, n_experts=2)

Thinking (step 1)
Tool call: get_company_background ({"ticker":"NVDA"})
Thinking (step 2)
Tool call: news_archive ({"start_date":"2024-01-23","end_date":"2024-04-23","max_results":10,"keywords":"NVIDIA"})
Added 2 documents to index and docstore (out of 2 total documents)
Thinking (step 3)
Tool call: get_company_income_stmt ({"ticker": "NVDA", "years": 3})
Thinking (step 4)
Tool call: get_company_balance_sheet ({"ticker":"NVDA","years":3})
Thinking (step 5)
Background report generated.
Domain experts generated: 2 experts found from 9 generated.
Initialized 2 root nodes.
Starting research on tree 0
Discovering node: Node(tree_id=0, step=0, is_leaf=False, d_time=1, f_time=None)
Starting research on tree 1
Discovering node: Node(tree_id=1, step=0, is_leaf=False, d_time=1, f_time=None)
Initialized 2 root nodes.
Starting research on tree 0
Discovering node: Node(tree_id=0, step=0, is_leaf=False, d_time=1, f_time=None)
Starting research on tree 1
Discovering node: Node(tree_id=1, step=0, is_l

In [34]:
def visualize_research_forest():
    """
    Generates an HTML visualization of the research forest structure.
    Each HypothesisResearcher is a root, with its root nodes and their children forming a tree.
    """
    global coordinator
    if not coordinator._research_forest:
        return "<div>No research forest available yet.</div>"
    
    html = """
    <style>
        .tree {
            font-family: Arial, sans-serif;
            margin: 20px;
            color: black;
        }
        .tree ul {
            list-style-type: none;
            padding-left: 20px;
            position: relative;
        }
        .tree ul:before {
            content: "";
            position: absolute;
            top: 0;
            left: 0;
            border-left: 1px solid #ccc;
            height: 100%;
        }
        .tree li {
            position: relative;
            padding: 5px 0;
        }
        .tree li:before {
            content: "";
            position: absolute;
            top: 50%;
            left: -20px;
            border-top: 1px solid #ccc;
            width: 20px;
        }
        .node {
            padding: 5px 10px;
            border: 1px solid #ddd;
            border-radius: 4px;
            background: #f8f9fa;
            display: inline-block;
        }
        .researcher {
            background: #e3f2fd;
            font-weight: bold;
        }
        .root-node {
            background: #f3e5f5;
        }
        .child-node {
            background: #f1f8e9;
        }
        .leaf_node {
            background: #fff3e0;
        }
    </style>
    <div class="tree">
    """
    
    for i, researcher in enumerate(coordinator._research_forest):
        html += f"""
        <ul>
            <li>
                <div class="node researcher">Hypothesis Researcher {i+1}</div>
                <ul>
        """
        
        # Add root nodes
        for root_node in researcher.root_nodes:
            html += f"""
                    <li>
                        <div class="node root-node">Root Node: step={root_node.step}, d_time={root_node.d_time}</div>
                        <ul>
            """
            
            # Add children recursively
            def add_children(node):
                html = ""
                for child in node.children:
                    if child.is_leaf:
                        html += f"""
                            <li>
                                <div class="node leaf_node">Leaf Node: step={child.step}, d_time={child.d_time}</div>
                        """
                    else:
                        html += f"""
                                <li>
                                    <div class="node child-node">Internal Node: step={child.step}, d_time={child.d_time}</div>
                        """
                    if child.children:
                        html += "<ul>"
                        html += add_children(child)
                        html += "</ul>"
                    html += "</li>"
                return html
            
            html += add_children(root_node)
            html += """
                        </ul>
                    </li>
            """
        
        html += """
                </ul>
            </li>
        </ul>
        """
    
    html += "</div>"
    return html 


In [35]:
# Display the HTML in a Jupyter Notebook cell
from IPython.core.display import HTML
html_output = visualize_research_forest()
HTML(html_output)

In [14]:
for report in reports:
    print("EXPERT:", report.metadata['expert'])
    print("EXPERT DESCRIPTION:", report.metadata['expert_description'])
    print(report)
    print('-'*50)

EXPERT: Financial Analyst
EXPERT DESCRIPTION: The Financial Analyst specializes in evaluating stocks by analyzing company financials, market trends, valuation metrics, and economic conditions to provide investment recommendations.
Final Accepted Hypothesis: Investing in NVIDIA "right now" hinges on balancing its robust financial and innovation-driven growth potential against significant near-term geopolitical trade risks, particularly in China. Investors with medium- to long-term horizons and higher risk tolerance should view current conditions as an opportune point to accumulate shares, anticipating strong secular growth; conversely, short-term or risk-averse investors should await clearer signals on trade developments before committing capital.

Final Report:

NVIDIA stands out as a powerhouse in the semiconductor and AI chip markets, demonstrated by its extraordinary financial and operational growth over recent years. From 2022 through 2025, NVIDIA's total revenue expanded from appr

In [18]:
summary_report = await coordinator.summarize_reports()
summary_report

Response(text='The provided research reports share a common challenge: the absence of direct, publicly available 2025 financial disclosures from Everest Re Group. Consequently, all analyses rely substantially on industry trends, peer performance, partial market data, and reasoned inference rather than detailed company-specific figures. Despite this limitation, several consistent themes and some nuanced differences emerge across the reports.\n\n---\n\n### 1. **Moderate Performance Change Consistent with Industry Trends**\n\n**Key Findings:**\n- Everest Re’s 2025 financial performance likely saw only moderate year-over-year changes.\n- These changes predominantly reflect broader reinsurance sector conditions rather than company-specific disruptions.\n- Industry-wide pressures included elevated underwriting expenses caused by substantial natural catastrophe claims.\n- Investment income likely provided partial offsetting gains due to rising interest rates improving asset returns.\n- No mat

In [20]:
print(summary_report.text)

The provided research reports share a common challenge: the absence of direct, publicly available 2025 financial disclosures from Everest Re Group. Consequently, all analyses rely substantially on industry trends, peer performance, partial market data, and reasoned inference rather than detailed company-specific figures. Despite this limitation, several consistent themes and some nuanced differences emerge across the reports.

---

### 1. **Moderate Performance Change Consistent with Industry Trends**

**Key Findings:**
- Everest Re’s 2025 financial performance likely saw only moderate year-over-year changes.
- These changes predominantly reflect broader reinsurance sector conditions rather than company-specific disruptions.
- Industry-wide pressures included elevated underwriting expenses caused by substantial natural catastrophe claims.
- Investment income likely provided partial offsetting gains due to rising interest rates improving asset returns.
- No material company-specific eve

In [17]:
# for hyp_researcher in coordinator._research_forest:
#     for report in hyp_researcher.research_reports:
#         print("EXPERT:", report.metadata['expert'])
#         print("EXPERT DESCRIPTION:", report.metadata['expert_description'])
#         print(report)
#         print('-'*50)