# Agents

In [None]:
import logging
from logging import StreamHandler

handler = StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)

## ReAct Agent

### Simple Example Using Rlly Simple Tools

In [None]:
from kruppe.llm import OpenAILLM
from kruppe.algorithm.agents import ReActResearcher
from kruppe.functional.base_tool import BaseTool



In [None]:
class ExampleReActResearcher(ReActResearcher):

    def _react_system_prompt(self) -> str:
        return """\
# Role
You answer questions through iterative cycles of reasoning and acting.

# Instruction
You answer questions by first thinking about the question, then call on tools retrieve information. Afterwards, you think about the retrieved information, and continue this process iteratively. When you have found an answer, you finish the task by generating FINISH[answer]. Unless it is the final FINISH action, always call a tool after each thought. Always respond with a thought.

# Output Format
- Always respond with an action at the end, and call on a tool.
- Only respond with one new action at a time.

# Example

## Example 1

### User
What is the capital of France?

### Assistant Response 1
#### Message
"Thought 1: I need to find the capital of France.
Action 1: I will call the get_capital tool to find the capital of France."

#### Tool Calls
get_capital(country="France)

### Assistant Response 2
#### Message
"Observation 1: Paris
Thought 2: The tool call returned Paris. So, the capital of France is paris.
Action 2: FINISH[Paris]"
"""

    def _react_user_prompt(self, query: str) -> str:
        return query

In [None]:
from kruppe.models import Document


class CountryHub(BaseTool):
    def get_capital(self, country: str) -> str:
        capitals = {
            "France": "Paris",
            "Germany": "Berlin",
            "Spain": "Madrid",
            "China": "Beijing",
        }
        return capitals.get(country, "Unknown"), [Document(text="Trust me", metadata={})]

    def get_continent(self, country: str) -> str:
        continents = {
            "France": "Europe",
            "Germany": "Europe",
            "Spain": "Europe",
            "China": "Asia",
        }
        return continents.get(country, "Unknown"), [Document(text="Trust me", metadata={})]

    def get_capital_schema(self):
        return {
            "type": "function",
            "function": {
                "name": "get_capital",
                "description": "Get the capital of a country",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "country": {"type": "string", "description": "The name of the country"},
                    },
                    "required": ["country"],
                    "additionalProperties": False
                },
                "strict": True
            }
        }
    
    def get_continent_schema(self):
        return {
            "type": "function",
            "function": {
                "name": "get_continent",
                "description": "Get the continent of a country",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "country": {"type": "string", "description": "The name of the country"},
                    },
                    "required": ["country"],
                    "additionalProperties": False
                },
                "strict": True
            }
        }



In [None]:
toolhub1 = CountryHub()

researcher = ExampleReActResearcher(
    llm=OpenAILLM(model="gpt-4.1"),
    toolkit=[
        toolhub1.get_capital,
        toolhub1.get_continent
    ]
)

In [None]:
query = "What is the capital and continent of China?"
researcher._messages = [
    {"role": "system", "content": researcher._react_system_prompt()},
    {"role": "user", "content": researcher._react_user_prompt(query)}
]

reason_messages, action = await researcher.reason(1)
print(reason_messages)
print(action)

In [None]:
tool_messages, obs, sources = await researcher.act(action)
print(tool_messages)
print(obs)
print(sources)

In [None]:
await researcher.execute(query)

## Toolkit

In [None]:
from kruppe.llm import OpenAILLM, OpenAIEmbeddingModel
from kruppe.functional.docstore.mongo_store import MongoDBStore
from kruppe.functional.rag.vectorstore.chroma import ChromaVectorStore
from kruppe.functional.rag.index.vectorstore_index import VectorStoreIndex
from kruppe.functional.rag.retriever.simple_retriever import SimpleRetriever

reset_db=False

db_name = "kruppe_librarian"
collection_name = "playground_3"

# Create doc store
unique_indices = [['title', 'datasource']] # NOTE: this is important to avoid duplicates
docstore = await MongoDBStore.acreate_db(
    db_name=db_name,
    collection_name=collection_name,
    unique_indices=unique_indices,
    reset_db=reset_db
)

# Create vectorstore index
embedding_model = OpenAIEmbeddingModel()
vectorstore = ChromaVectorStore(
    embedding_model=embedding_model,
    collection_name=collection_name,
    persist_path='/Volumes/Lexar/Daniel Liu/vectorstores/kruppe_librarian'
)

if reset_db:
    vectorstore.clear()
    
index = VectorStoreIndex(vectorstore=vectorstore)

retriever = SimpleRetriever(index=index)

In [None]:
from kruppe.functional.ragquery import RagQuery
from kruppe.functional.llmquery import LLMQuery
from kruppe.functional.newshub import NewsHub
from kruppe.functional.finhub import FinHub

from kruppe.data_source.news.nyt import NewYorkTimesData
from kruppe.data_source.news.ft import FinancialTimesData
from kruppe.data_source.news.newsapi import NewsAPIData

from kruppe.data_source.finance.yfin import YFinanceData

rag_query_engine = RagQuery(
    retriever = retriever,
    llm = OpenAILLM()
)

llm_query_engine = LLMQuery(
    llm = OpenAILLM()
)

news_hub = NewsHub(news_sources=[
    NewYorkTimesData(headers_path="../../.nyt-headers.json"),
    FinancialTimesData(headers_path="../../.ft-headers.json"),
    NewsAPIData()
])

fin_hub = FinHub(
    fin_source = YFinanceData(),
    llm = OpenAILLM()
)

toolkit = [
    rag_query_engine.rag_query,
    llm_query_engine.llm_query,
    news_hub.news_search,
    news_hub.news_recent,
    news_hub.news_archive,
    fin_hub.get_company_background,
    fin_hub.get_company_income_stmt,
    fin_hub.get_company_balance_sheet,
    fin_hub.analyze_company_financial_stmts
]

## Librarian Agent (more like ReAct agent at this point)

In [None]:
from kruppe.algorithm.librarian import Librarian

librarian = Librarian(
    llm=OpenAILLM(model="gpt-4.1"),
    docstore=docstore,
    index=index,
    toolkit = toolkit
)



In [None]:
query = "Who made more in revenue last year, NVIDIA or Tesla?"

In [None]:
librarian._messages = [
    {"role": "system", "content": librarian._react_system_prompt()},
    {"role": "user", "content": librarian._react_user_prompt(query)}
]

reason_messages, action = await librarian.reason(1)
print(reason_messages)
print(action)

In [None]:
librarian._messages.extend(reason_messages)
tool_call_messages, obs, sources = await librarian.act(1)
print(tool_call_messages)
print(obs)
print(sources)

In [None]:
result = await librarian.execute(query, to_print=True)
result

In [None]:
query = "I want a preliminary background report on Tesla, including its recent news and financials."
response = await librarian.execute(query, to_print=True)

In [None]:
print(response.text)
print('='*80)
for doc in response.sources:
    print(doc.text)
    print(doc.metadata)
    print("-" * 80)

## Coordinator

In [None]:
from kruppe.algorithm.coordinator import Coordinator

coordinator_llm = OpenAILLM(model="gpt-4.1-mini")

coordinator = Coordinator(
    llm=coordinator_llm,
    librarian=librarian
)

In [None]:
query = "I want a preliminary background report on Tesla, including its recent news and financials."

In [None]:
# skip this cell to run coordinator fully;
# im just going to paste in the background report that has already been generated.

from kruppe.models import Response

bkg_report = Response(
    text="""preliminary background report on tesla, inc.

1. company overview
tesla, inc. (tsla) designs, develops, manufactures, leases, and sells electric vehicles, as well as energy generation and storage systems in the united states, china, and internationally. founded in 2003 and headquartered in austin, texas, tesla operates through two main segments: automotive (electric vehicles and related services) and energy generation and storage (solar energy and battery products). as of the latest data, tesla employs over 125,000 people globally.

2. recent news
a targeted search for recent news about tesla over the past 30 days did not return any relevant articles in the available news databases. this may be due to limited coverage or the scope of the datasets searched.

3. financial overview & analysis (past 3 years)
- tesla saw modest revenue growth in 2024, but profitability margins contracted amid rising operating expenses and a significant tax charge.
- the company maintains a strong liquidity position, highlighted by increasing cash and short-term investments, and current and quick ratios above industry norms.
- active capital investments point to ongoing capacity and innovation expansion, underpinned by consistent r&d spending.
- although long-term debt rose, tesla's debt-to-equity ratio and earnings power suggest manageable financial risk.
- operationally, inventory turnover improved, while receivables turnover fell, highlighting a potential area for closer management.
- overall, tesla demonstrates balanced growth, innovation, and financial stability, though it faces short-term challenges in profitability.

this report provides a concise snapshot of tesla’s business, recent developments, and financial health as of 2024.
""",
    sources=[]
)

coordinator._background_report = bkg_report

In [None]:
domain_experts = await coordinator.generate_domain_experts(query)
filtered_domain_experts = await coordinator.filter_domain_experts(query, domain_experts,
                                                                  n_experts=5)

for expert, desc in domain_experts.items():
    print(f"{expert}: {desc}")

print("="*80)

for expert, desc in filtered_domain_experts.items():
    print(f"{expert}: {desc}")

In [None]:
# next is just initialize research forest, which doesnt require much testing
# going to test hypothesis researcher next.

## Hypothesis Researcher

In [None]:
from kruppe.algorithm.hypothesis import HypothesisResearcher
from kruppe.llm import OpenAILLM

logger = logging.getLogger("kruppe.llm")
logger.setLevel(logging.INFO)
logger.addHandler(handler)

hyp_llm = OpenAILLM(model="gpt-4.1-mini")
expert_title = "Regulatory Affairs Specialist in Automotive and Energy Sectors"
expert_desc = "The Regulatory Affairs Specialist understands the complex regulatory frameworks impacting electric vehicle manufacturers and clean energy companies worldwide. This expert assesses how changing laws and policies influence Tesla’s operations and strategy."

hyp_researcher = HypothesisResearcher(
    role=expert_title,
    role_description=expert_desc,
    max_degree=2,
    max_depth=10,
    llm=hyp_llm,
    toolkit = toolkit
)

hyp_researcher

In [None]:
query = "How will the recent changes in US EV tax credits impact Tesla's sales in 2024?"

background = """preliminary background report on tesla, inc.

1. company overview
tesla, inc. (tsla) designs, develops, manufactures, leases, and sells electric vehicles, as well as energy generation and storage systems in the united states, china, and internationally. founded in 2003 and headquartered in austin, texas, tesla operates through two main segments: automotive (electric vehicles and related services) and energy generation and storage (solar energy and battery products). as of the latest data, tesla employs over 125,000 people globally.

2. recent news
a targeted search for recent news about tesla over the past 30 days did not return any relevant articles in the available news databases. this may be due to limited coverage or the scope of the datasets searched.

3. financial overview & analysis (past 3 years)
- tesla saw modest revenue growth in 2024, but profitability margins contracted amid rising operating expenses and a significant tax charge.
- the company maintains a strong liquidity position, highlighted by increasing cash and short-term investments, and current and quick ratios above industry norms.
- active capital investments point to ongoing capacity and innovation expansion, underpinned by consistent r&d spending.
- although long-term debt rose, tesla's debt-to-equity ratio and earnings power suggest manageable financial risk.
- operationally, inventory turnover improved, while receivables turnover fell, highlighting a potential area for closer management.
- overall, tesla demonstrates balanced growth, innovation, and financial stability, though it faces short-term challenges in profitability.

this report provides a concise snapshot of tesla’s business, recent developments, and financial health as of 2024.
"""

In [None]:
root_node = await hyp_researcher.init_starting_node(query, background)


In [None]:
root_node.__dict__

In [None]:
response = await hyp_researcher.dfs_research(root_node)