# Background Research Experiment

In [1]:
from kruppe.llm import OpenAILLM
from kruppe.prompts.background import (
    RESEARCH_STANDARD_SYSTEM,
    ANALYZE_QUERY_USER_CHAIN,
    ASSIGN_TOOL_USER,
)

llm = OpenAILLM()

## Analyze Query

In [4]:
query = "How will Nvidia’s recent GTC Conference announcements—specifically its new AI products and strategies—impact its competitive positioning and long-term growth in the evolving AI and data center markets?"

In [5]:
entity_categories = [
    "firm",
    "industry",
    "country",
    "event",
    "personnel",
    "terminology",
]
information_categories = ["finance", "economy", "industry"]

user_message = ANALYZE_QUERY_USER_CHAIN[0].format(
    query=query,
    entity_categories=", ".join(entity_categories),
    information_categories = ", ".join(information_categories)
)
messages = [
    {"role": "system", "content": RESEARCH_STANDARD_SYSTEM},
    {"role": "user", "content": user_message},
]

response1 = await llm.async_generate(messages)
messages.append({"role": "assistant", "content": response1.text})

# extract additional information from query
followup_message = ANALYZE_QUERY_USER_CHAIN[1].format(
    query=query, information_categories=", ".join(information_categories)
)
messages.append({"role": "user", "content": followup_message})

response2 = await llm.async_generate(messages)
messages.append({"role": "assistant", "content": response2.text})

response_text = response1.text+response2.text
print(response_text)


(entity|Nvidia|firm|Nvidia is the main subject of the query, as the question focuses on its announcements and performance in the market.)
(entity|GTC Conference|event|The GTC Conference is a specific event where Nvidia made announcements; understanding this event is crucial for context regarding the announcements made.)
(entity|AI products|terminology|The new AI products mentioned are a primary focus that needs to be explored further to understand their features and potential impact.)
(entity|strategies|terminology|The strategies that Nvidia is employing are central to evaluating how they might influence competitive positioning and growth, necessitating deeper insight into them.)
(entity|competitive positioning|terminology|Competitive positioning is essential to the query as it assesses Nvidia's market standing relative to its competitors, requiring further understanding of market dynamics.)
(entity|long-term growth|terminology|Long-term growth is a major concern of the inquiry, highli

In [6]:
import re
def _process_info_request(response_text: str):
    pattern = re.compile(r'(?<=\(|\|)([^|()]+)(?=\||\))')
    results = response_text.splitlines()

    info_requests = [] # i call them "requests", like "info_requests" or "tool_requests"
    entities = []

    for result in results:
        match = pattern.findall(result)

        if not match: continue
        # print(match)

        if (match[0] == "entity"):
            entity = {
                "type": match[0],
                "entity_name": match[1],
                "entity_category": match[2],
                "reasoning": match[3]
            }

            entities.append(entity)
            
        elif (match[0] == "info"):
            info_request = {
                "type": match[0],
                "info_description": match[1],
                "info_category": match[2],
                "reasoning": match[3],
                "entity_name": match[4] if len(match) > 4 else ""
            }
            
            info_requests.append(info_request)
    return info_requests, entities

info_requests, entities = _process_info_request(response_text)
info_requests, entities

([{'type': 'info',
   'info_description': "Details of Nvidia's new AI products announced at GTC Conference, including specifications and expected launch timeline",
   'info_category': 'industry',
   'reasoning': 'This information is crucial to gauge how innovative or competitive these products are compared to other offerings in the market, impacting performance.',
   'entity_name': 'Nvidia'},
  {'type': 'info',
   'info_description': "Insights on Nvidia's strategic initiatives discussed at the GTC Conference, including partnerships, investments, and research directions",
   'info_category': 'others',
   'reasoning': "Understanding these strategies will clarify Nvidia's approach to market competition and potential growth pathways, directly influencing competitive positioning.",
   'entity_name': 'Nvidia'},
  {'type': 'info',
   'info_description': 'Analysis of industry trends in AI products, including current and emerging competitors and technological advancements',
   'info_category': 

In [7]:
all_requests = info_requests + entities
all_requests_sorted = sorted(all_requests, key=lambda x: (x["entity_name"], x["type"]) )
all_requests_sorted

[{'type': 'info',
  'info_description': "Insights on Nvidia's strategic initiatives discussed at the GTC Conference, including partnerships, investments, and research directions",
  'info_category': 'others',
  'reasoning': "Understanding these strategies will clarify Nvidia's approach to market competition and potential growth pathways, directly influencing competitive positioning.",
  'entity_name': ''},
 {'type': 'info',
  'info_description': 'Analysis of industry trends in AI products, including current and emerging competitors and technological advancements',
  'info_category': 'industry',
  'reasoning': "This information is vital to assess how Nvidia's new products will resonate in the competitive landscape and their potential market share impact.",
  'entity_name': ''},
 {'type': 'info',
  'info_description': 'Evaluation of competitive positioning metrics for Nvidia compared to its main competitors in AI and data center sectors',
  'info_category': 'finance',
  'reasoning': 'Suc

In [3]:
from kruppe.data_source.news.nyt import NewYorkTimesData

nyt = NewYorkTimesData(headers_path="/Users/danielliu/Workspace/fin-rag/.nyt-headers.json")
nyt.news_search_schema

{'type': 'function',
 'name': 'news_search',
 'description': 'Search New York Times for news articles with a query. Use news_search to search for a specific topic.',
 'parameters': {'type': 'object',
  'properties': {'query': {'type': 'string',
    'description': 'Search query to New York Times'},
   'num_results': {'type': 'number',
    'description': 'Number of top results to return'},
   'sort': {'type': ['string', 'null'],
    'enum': ['relevance', 'date'],
    'description': 'How to sort results. Pass null if not needed.'}},
  'required': ['query', 'num_results', 'sort_by'],
  'additionalProperties': False}}

In [27]:
import json
tools_schemas = [nyt.news_search_schema, nyt.news_recent_schema, nyt.news_archive_schema]

user_message = ASSIGN_TOOL_USER.format(
    info_request="\n".join([json.dumps(request) for request in all_requests_sorted]),
    tools_descriptions="\n".join([json.dumps(tool) for tool in tools_schemas]))

messages = [
    {"role": "system", "content": RESEARCH_STANDARD_SYSTEM},
    {"role": "user", "content": user_message},
]

print(user_message)

response_tools = await llm.async_generate(messages)
response_tools_str = response_tools.text
print(response_tools_str)

-Goal-
Given a request for information and a list of tools, determine which tools an AI agent should use to conduct research for the request for information. 

-Steps-
1. For each tool, use your intuition and the tool description to infer the type of information that the tool can provide for the request.
For each tool, determine the following information:
- tool_name: the tool name in question. use the original name provided in the input
- research_area: the area of research that this tool can assist with to learn more about the request, and its importance.
- rank: the importance of using this tool, where 1 is very important and must be used, and 3 is least important and can be ignored. Minimize the number of tools assigned with a rank of 1, unless absolutely necessary.
- parameters: the parameters that will be entered to the function. It should be a stringified object.

Format each tool identification as (tool|<tool_name>|<research_area>|<rank>|<parameters>)

2. Return output in Engli

In [25]:
def _process_tool_request(response_text: str):
    pattern = re.compile(r'(?<=\(|\|)([^|()]+)(?=\||\))')
    results = response_text.splitlines()

    tool_requests = [] # i call them "requests", like "info_requests" or "tool_requests"

    for result in results:
        match = pattern.findall(result)

        tool_request = {
            "type": match[0],
            "tool_name": match[1],
            "research_area": match[2],
            "rank": match[3],
            "parameters": match[4]
        }
        
        tool_requests.append(tool_request)
    tool_requests = sorted(tool_requests, key=lambda x: x["rank"])
    return tool_requests

tool_requests = _process_tool_request(response_tools_str)
tool_requests

request = tool_requests[0]
json.loads(request["parameters"])

{'query': 'Nvidia strategic initiatives GTC Conference, partnerships, investments, research directions',
 'num_results': 10,
 'sort': 'relevance'}

In [49]:
from kruppe.data_source.news.base_news import NewsSource
from typing import Callable
from types import FunctionType

for i in set(x for x, y in NewsSource.__dict__.items() if isinstance(y, Callable)):
    print(i)

news_search
news_recent
news_archive


In [51]:
set(x for x, y in nyt.__class__.__dict__.items() if isinstance(y, Callable))

{'_nyt_scraper_helper', 'news_archive', 'news_recent', 'news_search'}

In [1]:
from kruppe.data_source.news.base_news import NewsSource

NewsSource.get_schema("news_search")

<property at 0x10880bd30>