# Testing Hypothesis Researcher

In [None]:
# logging imports
import logging
from logging import StreamHandler

# essential kruppe imports
from kruppe.llm import OpenAILLM

# toolkit import
from kruppe.llm import OpenAIEmbeddingModel
from kruppe.functional.docstore.mongo_store import MongoDBStore
from kruppe.functional.rag.vectorstore.chroma import ChromaVectorStore
from kruppe.functional.rag.index.vectorstore_index import VectorStoreIndex
from kruppe.functional.rag.retriever.simple_retriever import SimpleRetriever
from kruppe.functional.ragquery import RagQuery
from kruppe.functional.llmquery import LLMQuery
from kruppe.functional.newshub import NewsHub
from kruppe.functional.finhub import FinHub
from kruppe.data_source.news.nyt import NewYorkTimesData
from kruppe.data_source.news.ft import FinancialTimesData
from kruppe.data_source.news.newsapi import NewsAPIData
from kruppe.data_source.finance.yfin import YFinanceData

# hypothesis researcher import
from kruppe.algorithm.hypothesis import HypothesisResearcher


# set up logging for jupyter notebook

ch = StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)
ch.setLevel(logging.DEBUG)

fh = logging.FileHandler('logs/hypothesis_researcher.log')
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG)

logger = logging.getLogger("kruppe.llm")
logger.setLevel(logging.INFO)
logger.addHandler(fh) # log llm output to a file instead of console.

## Initializing Toolkit

Set up document store and vector store

In [None]:
reset_db=False

db_name = "kruppe_librarian"
collection_name = "playground_3"

# Create doc store
unique_indices = [['title', 'datasource']] # NOTE: this is important to avoid duplicates
docstore = await MongoDBStore.acreate_db(
    db_name=db_name,
    collection_name="playground",
    unique_indices=unique_indices,
    reset_db=reset_db
)

# Create vectorstore index
embedding_model = OpenAIEmbeddingModel()
vectorstore = ChromaVectorStore(
    embedding_model=embedding_model,
    collection_name=collection_name,
    persist_path='/Volumes/Lexar/Daniel Liu/vectorstores/kruppe_librarian'
)

if reset_db:
    vectorstore.clear()
    
index = VectorStoreIndex(vectorstore=vectorstore)

retriever = SimpleRetriever(index=index)

In [None]:
rag_query_engine = RagQuery(
    retriever = retriever,
    llm = OpenAILLM()
)

llm_query_engine = LLMQuery(
    llm = OpenAILLM()
)

news_hub = NewsHub(news_sources=[
    NewYorkTimesData(headers_path="../../.nyt-headers.json"),
    FinancialTimesData(headers_path="../../.ft-headers.json"),
    NewsAPIData()
])

fin_hub = FinHub(
    fin_source = YFinanceData(),
    llm = OpenAILLM()
)

In [None]:
toolkit = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    news_hub.news_recent,
    # news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

## Hypothesis Researcher

Or what I call `Research Tree` lol

### Initialization

Inputs

In [None]:
query = "How will the recent changes in US EV tax credits impact Tesla's sales in 2024?"

Values that should be generated by other agentic components

In [None]:
# values that should be generated by other agents components
expert_title = "Regulatory Affairs Specialist in Automotive and Energy Sectors"
expert_desc = "The Regulatory Affairs Specialist understands the complex regulatory frameworks impacting electric vehicle manufacturers and clean energy companies worldwide. This expert assesses how changing laws and policies influence Tesla’s operations and strategy."

background = """preliminary background report on tesla, inc.

1. company overview
tesla, inc. (tsla) designs, develops, manufactures, leases, and sells electric vehicles, as well as energy generation and storage systems in the united states, china, and internationally. founded in 2003 and headquartered in austin, texas, tesla operates through two main segments: automotive (electric vehicles and related services) and energy generation and storage (solar energy and battery products). as of the latest data, tesla employs over 125,000 people globally.

2. recent news
a targeted search for recent news about tesla over the past 30 days did not return any relevant articles in the available news databases. this may be due to limited coverage or the scope of the datasets searched.

3. financial overview & analysis (past 3 years)
- tesla saw modest revenue growth in 2024, but profitability margins contracted amid rising operating expenses and a significant tax charge.
- the company maintains a strong liquidity position, highlighted by increasing cash and short-term investments, and current and quick ratios above industry norms.
- active capital investments point to ongoing capacity and innovation expansion, underpinned by consistent r&d spending.
- although long-term debt rose, tesla's debt-to-equity ratio and earnings power suggest manageable financial risk.
- operationally, inventory turnover improved, while receivables turnover fell, highlighting a potential area for closer management.
- overall, tesla demonstrates balanced growth, innovation, and financial stability, though it faces short-term challenges in profitability.

this report provides a concise snapshot of tesla’s business, recent developments, and financial health as of 2024.
"""

initialization

In [None]:
hyp_llm = OpenAILLM(model="gpt-4.1-mini")

hyp_researcher = HypothesisResearcher(
    role=expert_title,
    role_description=expert_desc,
    max_degree=2,
    max_depth=10,
    llm=hyp_llm,
    toolkit = toolkit
)



### Init Starting Node

In [None]:
root_node = await hyp_researcher.init_starting_node(query, background)


In [None]:
root_node.__dict__

### DFS Search

In [None]:
response = await hyp_researcher.dfs_research(root_node, query)

In [None]:
root_node.children[0].act_queued