In [22]:
import os

import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain import hub
from langchain.tools.render import render_text_description_and_args
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.embeddings import SentenceTransformerEmbeddings

from utils import map_openbb_collection_to_langchain_tools


os.environ["TOKENIZERS_PARALLELISM"] = "False"  # Avoid some warnings from HuggingFace
# Set up OpenAI API key
# os.environ["OPENAI_API_KEY"] = ""
# openai.api_key = ""

In [23]:
# Let's set-up our tool retrieval
fundamental_openbb_tools = map_openbb_collection_to_langchain_tools(
    "/equity/fundamental"
)

peer_comparison_openbb_tools = map_openbb_collection_to_langchain_tools(
    '/equity/compare'
)

estimate_openbb_tools = map_openbb_collection_to_langchain_tools(
    "/equity/estimates"
)

openbb_tools = fundamental_openbb_tools + peer_comparison_openbb_tools + estimate_openbb_tools

docs = [
    Document(page_content=t.description, metadata={"index": i})
    for i, t in enumerate(openbb_tools)
]
print(len(docs))

# Emperically, these Sentence embeddings can occasionally be better.
# (But it is a substantially heavier dependency than relying on OpenAI's API.)
# sentence_transformer = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# vector_store = FAISS.from_documents(docs, sentence_transformer)
vector_store = FAISS.from_documents(docs, OpenAIEmbeddings())

25


In [24]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})  # <- now returns top 2

def get_tools(query):
    docs = retriever.get_relevant_documents(query)
    return [openbb_tools[d.metadata["index"]] for d in docs]

# Quick test
fetched_tools = get_tools("analyst price")
for tool in fetched_tools:
    print("tool: " + tool.name + ", description: " + tool.description.split('\n')[0])

tool: /equity/estimates/historical, description: Historical Analyst Estimates. Analyst stock recommendations.
tool: /equity/estimates/price_target, description: Price Target. Price target data.


In [31]:
# Let's make it easy to create react agents since we'll need a lot of them later.
from langchain.output_parsers import RetryWithErrorOutputParser
from langchain.llms import OpenAI

retry_parser = RetryWithErrorOutputParser.from_llm(
    parser=JSONAgentOutputParser(), llm=OpenAI(temperature=0, model="gpt-4")
)

def langchain_react_agent(tools):
    """Define a ReAct agent bound with specific tools."""
    prompt = hub.pull("hwchase17/react-multi-input-json")
    prompt = prompt.partial(
        tools=render_text_description_and_args(tools),
        tool_names=", ".join([t.name for t in tools]),
    )

    llm = ChatOpenAI(model="gpt-4-1106-preview").bind(stop=["\nObservation"])

    chain = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
        }
        | prompt
        | llm
        | JSONAgentOutputParser()
    )

    agent_executor = AgentExecutor(
        agent=chain,
        tools=tools,
        verbose=True,  # <-- set this to False to cut down on output spam. But it's useful for debugging!
        return_intermediate_steps=False,
        handle_parsing_errors=True,
    )
    return agent_executor

agent_executor = langchain_react_agent(tools=fetched_tools)

In [32]:
# The primary goal is to
# 1. Break a larger question down into subquestions + the appropriate query to fetch the right tool to answer the subquestion
# 2. Retrieve the right tools for each subquestion
# 3. Answer each subquestion using a ReAct agent
# 4. To combine all of the subquestion answers to generate a final answer.

# Part 1 break it into subquestions


# We'll use Pydantic to do some output enforcement
# (It's just prompting and parsing under the hood)
class SubQuestion(BaseModel):
    id: int = Field(description="The unique ID of the subquestion.")
    question: str = Field(description="The subquestion itself.")
    query: str = Field(
        description="The query to pass to the `fetch_tools` function to retrieve the appropriate tool to answer the question."
    )
    depends_on: list[int] = Field(description="The list of subquestion ids whose answer is required to answer this subquestion.", default=[])

class SubQuestionList(BaseModel):
    subquestions: list[SubQuestion] = Field(
        description="The list of SubQuestion objects."
    )

subquestion_parser = PydanticOutputParser(pydantic_object=SubQuestionList)

system_message = """\
You are a world-class state-of-the-art agent.

You can access multiple tools, via a "fetch_tools" function that will retrieve the necessary tools.
The `fetch_tools` function accepts a string of keywords as input specifying the type of tool to retrieve.
Each retrieved tool represents a different data source or API that can retrieve the required data.

Your purpose is to help answer a complex user question by generating a list of sub
questions, as well as the corresponding keyword query to the "fetch_tools" function
to retrieve the relevant tools to answer each corresponding subquestion.
You must also specify the dependencies between subquestions, since sometimes one
subqueston will require the outcome of another in order to fully answer.

These are the guidelines you consider when completing your task:
* Be as specific as possible
* Avoid using acronyms
* The sub-questions should be relevant to the user question
* The sub-questions should be answerable by the tools retrieved by the query to `fetch_tools`
* You can generate multiple sub-questions
* You don't need to query for a tool if you don't think it's relevant
* A subquestion may not depend on a subquestion that proceeds it (i.e. comes after it.)

## Output format
{format_instructions}

### Example responses
```json
{{"subquestions": [
    {{
        "id": 1,
        "question": "What are the latest financial statements of AMZN?", 
        "query": "financial statements",
        "depends_on": []
    }}, 
    {{
        "id": 2,
        "question": "What is the most recent revenue and profit margin of AMZN?", 
        "query": "revenue profit margin ratios",
        "depends_on": []
    }}, 
    {{
        "id": 3,
        "question": "What is the current price to earnings (P/E) ratio of AMZN?", 
        "query": "ratio price to earnings",
        "depends_on": []
    }}, 
    {{
        "id": 4,
        "question": "Who are the peers of AMZN?", 
        "query": "peers",
        "depends_on": []
    }},
    {{
        "id": 5,
        "question": "Which of AMZN's peers have the largest market cap?", 
        "query": "market cap",
        "depends_on": [4]
    }}
]}}
```

"""

human_message = """\
    ## User Question
    {input}
    """

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("human", human_message),
    ]
)
prompt = prompt.partial(
    format_instructions=subquestion_parser.get_format_instructions()
)

llm = ChatOpenAI(
    model="gpt-4"
)  # gpt-3.5-turbo works well, but gpt-4-1106-preview isn't good at returning JSON.

subquestion_chain = {"input": lambda x: x["input"]} | prompt | llm | subquestion_parser

# Our high-level question we're going to attempt to answer
INPUT =  """\
Check what are TSLA peers. From those, check which one has the highest market cap.
Then, on the ticker that has the highest market cap get the most recent price target estimate from an analyst,
and tell me who it was and one what date the estimate was made.
"""

# INPUT = "Perform a fundamentals financial analysis of AMZN using the most recently available data. What do you find that's interesting?"

subquestion_list = subquestion_chain.invoke({"input": INPUT})

for subquestion in subquestion_list.subquestions:
    print(
        subquestion
    )  # We probably won't have all the right tools to answer these questions. Improvements for the future!

id=1 question='Who are the peers of Tesla Inc. (TSLA)?' query='peers TSLA' depends_on=[]
id=2 question="Which of TSLA's peers has the largest market cap?" query='market cap' depends_on=[1]
id=3 question='What is the most recent price target estimate for the company with the highest market cap?' query='price target estimate' depends_on=[2]
id=4 question='Who provided the most recent price target estimate?' query='analyst information' depends_on=[3]
id=5 question='On what date was the most recent price target estimate made?' query='estimate date' depends_on=[3]


In [33]:
# Part 2 is to fetch the appropriate tool for each subquestion
# (We'll create a new data structure here we can re-use)

subquestions_and_tools = []

for subquestion in subquestion_list.subquestions:
    tools = get_tools(subquestion.query)
    subquestions_and_tools.append(
        {   "id": subquestion.id,
            "subquestion": subquestion.question,
            "query": subquestion.query,
            "tools": tools,
            "depends_on": subquestion.depends_on,
        }
    )

for subq in subquestions_and_tools:
    print("id: ", subq["id"])
    print("subquestion ", subq["subquestion"])
    print("query: ", subq["query"])
    print("depends on: ", subq["depends_on"])
    for tool in subq["tools"]:
        print("  " + tool.name + ": " + tool.description.split('\n')[0])
    print("")

id:  1
subquestion  Who are the peers of Tesla Inc. (TSLA)?
query:  peers TSLA
depends on:  []
  /equity/compare/peers: Equity Peers. Company peers.
  /equity/fundamental/multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.

id:  2
subquestion  Which of TSLA's peers has the largest market cap?
query:  market cap
depends on:  [1]
  /equity/fundamental/metrics: Key Metrics. Key metrics for a given company.
  /equity/fundamental/multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.

id:  3
subquestion  What is the most recent price target estimate for the company with the highest market cap?
query:  price target estimate
depends on:  [2]
  /equity/estimates/consensus: Price Target Consensus. Price target consensus data.
  /equity/estimates/price_target: Price Target. Price target data.

id:  4
subquestion  Who provided the most recent price target estimate?
query:  analyst information
depends on:  [3]
  /equity/estimates/historical: Histo

In [34]:
# Part 3 is to answer each of the subqueries. We'll use a ReAct agent paired with the subquestion and appropriate tools to do this.

agents = []
for i, subquestion in enumerate(subquestions_and_tools):
    react_agent = langchain_react_agent(tools=subquestion["tools"])
    agents.append(react_agent)

len(agents)

5

In [35]:
# Run the agents to answer the subquestions
for i, subquestion in enumerate(subquestions_and_tools):
    deps = [dep for dep in subquestions_and_tools if dep["id"] in subquestion["depends_on"]]

    dependencies = ""
    for dep in deps:
        dependencies += "subquestion: " + dep["subquestion"] + "\n"
        dependencies += "observations:\n" + str(dep["observation"]) + "\n\n"


    input = f"""\
Given the following high-level question: {INPUT}
Answer only the following subquestion: {subquestion['subquestion']}

Give your answer in a bullet-point list.
Explain your reasoning, and make reference to and provide the relevant retrieved data as part of your answer.

Remember to use the tools provided to you to answer the question, and STICK TO THE INPUT SCHEMA.

Example output format:
```
- <the first observation, insight, and/or conclusion> 
- <the second observation, insight, and/or conclusion> 
- <the third observation, insight, and/or conclusion> 
... REPEAT AS MANY TIMES AS NECESSARY TO ANSWER THE SUBQUESTION.
```
If necessary, make use of the following subquestions and their answers was to answer your subquestion:
{dependencies}

Return only your answer as a bulleted list as a single string. Don't respond with JSON or any other kind of datastructure.
"""

    print("=======QUESTION + PROMPT========")
    print(input)


    result = agents[i].invoke({"input": input})
    output = result["output"]

    try:
        result = agents[i].invoke({"input": input})
        output = result["output"]
    except Exception as err:  # Terrible practice, but it'll do for now.
        print(err)
        output = "I was unable to answer the subquestion using the available tool."  # We'll include the error message in the future

    # print(subquestion["subquestion"])
    # print("----")
    # print(output)
    # print("=======")

    # We'll misbehave and re-use the same datastructure again
    subquestion["observation"] = output
    print("=======ANSWER========")
    print(output)
    print("=====================")
    print("=====================")

Given the following high-level question: Check what are TSLA peers. From those, check which one has the highest market cap.
Then, on the ticker that has the highest market cap get the most recent price target estimate from an analyst,
and tell me who it was and one what date the estimate was made.

Answer only the following subquestion: Who are the peers of Tesla Inc. (TSLA)?

Give your answer in a bullet-point list.
Explain your reasoning, and make reference to and provide the relevant retrieved data as part of your answer.

Remember to use the tools provided to you to answer the question, and STICK TO THE INPUT SCHEMA.

Example output format:
```
- <the first observation, insight, and/or conclusion> 
- <the second observation, insight, and/or conclusion> 
- <the third observation, insight, and/or conclusion> 
... REPEAT AS MANY TIMES AS NECESSARY TO ANSWER THE SUBQUESTION.
```
If necessary, make use of the following subquestions and their answers was to answer your subquestion:


Ret

In [37]:
# Part 4 is to generate a response based on the answers to each of the subquestions

def render_subquestions_and_answers(subquestions):
    output = ""
    for subquestion in subquestions:
        output += "Subquestion: " + subquestion["subquestion"] + "\n"
        output += "Observations: \n" + str(subquestion["observation"]) + "\n\n"

    return output


system_message = """\
    Given the following high-level question: 

    Question: {input}

    And the following subquestions and subsequent observations:

    {subquestions}

    Answer the high-level question. Give your answer in a bulleted list.
    """


prompt = ChatPromptTemplate.from_messages([("system", system_message)])

llm = ChatOpenAI(model="gpt-4")  # Let's use the big model for the final answer.

final_chain = (
    {
        "input": lambda x: x["input"],
        "subquestions": lambda x: render_subquestions_and_answers(x["subquestions"]),
    }
    | prompt
    | llm
)

result = final_chain.invoke({"input": INPUT, "subquestions": subquestions_and_tools})

In [40]:
from openbb import obb
obb.equity.estimates.price_target("F").to_df()

Unnamed: 0,symbol,published_date,news_url,news_title,analyst_name,analyst_company,price_target,adj_price_target,price_when_posted,news_publisher,news_base_url
0,F,2023-11-01 14:38:00,https://www.benzinga.com/analyst-ratings/analy...,Barclays Upgrades Both Ford And General Motors...,Dan Levy,BOCOM International Holdings Company,14.0,14.0,9.91,Benzinga,benzinga.com
1,F,2023-10-25 14:22:00,https://www.benzinga.com/analyst-ratings/analy...,Ford Motor Company Q3 Earnings Preview: UAW St...,Tom Narayan,RBC Capital,12.0,12.0,11.54,Benzinga,benzinga.com
2,F,2023-07-31 13:21:00,https://www.benzinga.com/news/23/07/33469179/f...,Ford's $4.5 Billion EV Loss Overshadowed by Q2...,Adam Jonas,Morgan Stanley,16.0,16.0,13.11,Benzinga,benzinga.com
3,F,2022-12-06 06:44:00,https://www.benzinga.com/trading-ideas/long-id...,"Ford, Tesla And Other Big Auto Stocks From Ben...",Adam Jonas,Morgan Stanley,32.0,32.0,13.38,Benzinga,benzinga.com
4,F,2022-11-30 06:58:00,https://www.benzinga.com/news/22/11/29902358/c...,"Citigroup Maintains Neutral on Ford Motor, Rai...",,Citigroup,14.0,14.0,13.75,Benzinga,benzinga.com
5,F,2022-10-27 11:28:00,https://www.benzinga.com/analyst-ratings/analy...,Ford Earnings Beat Doesn't Impress These Analy...,Mark Delaney,Goldman Sachs,13.0,13.0,12.93,Benzinga,benzinga.com
6,F,2022-10-05 08:17:00,https://www.benzinga.com/analyst-ratings/upgra...,Why Ford Stock Is Rising Today,Adam Jonas,Morgan Stanley,14.0,14.0,12.36,Benzinga,benzinga.com
7,F,2022-09-28 07:24:00,https://www.benzinga.com/news/22/09/29048590/c...,"Citigroup Maintains Neutral on Ford Motor, Low...",,Citigroup,13.0,13.0,11.91,Benzinga,benzinga.com
8,F,2022-08-02 08:34:00,https://www.benzinga.com/news/22/08/28308913/c...,"Citigroup Maintains Neutral on Ford Motor, Rai...",,Citigroup,16.0,16.0,15.34,Benzinga,benzinga.com
9,F,2022-07-28 10:22:00,https://www.benzinga.com/news/22/07/28247968/r...,RBC Capital Maintains Sector Perform on Ford M...,,RBC Capital,15.0,15.0,13.725,Benzinga,benzinga.com


In [38]:
print(result.content)  # Et voila

- The peers of Tesla Inc. (TSLA) include XPeng Inc. (XPEV), Li Auto Inc. (LI), Rivian Automotive, Inc. (RIVN), Lucid Group, Inc. (LCID), General Motors Company (GM), NIO Inc. (NIO), Ford Motor Company (F), Fisker Inc. (FSR), and Mullen Automotive, Inc. (MULN).
- Among these peers, Ford Motor Company (F) has the largest market cap, which is $46,682,800,000 as of the data for the fiscal year ending on December 31, 2022.
- The most recent price target estimate for Ford Motor Company (F) was made on November 1, 2023.
- The analyst who provided this estimate was Dan Levy from BOCOM International Holdings Company.
- The price target given by Dan Levy was $14.0.
