In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain import hub
from langchain.tools.render import render_text_description_and_args
from langchain.output_parsers import PydanticOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.embeddings import SentenceTransformerEmbeddings

from utils import map_openbb_collection_to_langchain_tools

# Set up OpenAI API key
import os
import openai

os.environ["TOKENIZERS_PARALLELISM"] = "False"  # Avoid some warnings from HuggingFace

# os.environ["OPENAI_API_KEY"] = ""
# openai.api_key = ""

In [2]:
# Let's set-up our tool retrieval

fundamental_openbb_tools = map_openbb_collection_to_langchain_tools(
    "/equity/fundamental"
)

docs = [
    Document(page_content=t.description, metadata={"index": i})
    for i, t in enumerate(fundamental_openbb_tools)
]

# Emperically, these Sentence embeddings can occasionally be better.
# (But it is a substantially heavier dependency than relying on OpenAI's API.)
sentence_transformer = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
#vector_store = FAISS.from_documents(docs, sentence_transformer)
vector_store = FAISS.from_documents(docs, OpenAIEmbeddings())

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})  # <- now returns top 2

def get_tools(query):
    docs = retriever.get_relevant_documents(query)
    return [fundamental_openbb_tools[d.metadata["index"]] for d in docs]

# Quick test
fetched_tools = get_tools("money in")
for tool in fetched_tools:
    print("tool: " + tool.name + ", description: " + tool.description.split('\n')[0])

tool: income, description: Income Statement. Report on a company's financial performance.
tool: cash, description: Cash Flow Statement. Information about the cash flow statement.


In [4]:
# Let's make it easy to create react agents since we'll need a lot of them later.
def langchain_react_agent(tools):
    """Define a ReAct agent bound with specific tools."""
    prompt = hub.pull("hwchase17/react-multi-input-json")
    prompt = prompt.partial(
        tools=render_text_description_and_args(tools),
        tool_names=", ".join([t.name for t in tools]),
    )

    llm = ChatOpenAI(model="gpt-4-1106-preview").bind(stop=["\nObservation"])

    chain = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
        }
        | prompt
        | llm
        | JSONAgentOutputParser()
    )

    agent_executor = AgentExecutor(
        agent=chain,
        tools=tools,
        verbose=False,
        return_intermediate_steps=False,
        handle_parsing_errors=True,
    )
    return agent_executor


agent_executor = langchain_react_agent(tools=fetched_tools)
result = agent_executor.invoke({"input": input})

In [5]:
# The primary goal is to
# 1. Break a larger question down into subquestions + the appropriate query to fetch the right tool to answer the subquestion
# 2. Retrieve the right tools for each subquestion
# 3. Answer each subquestion using a ReAct agent
# 4. To combine all of the subquestion answers to generate a final answer.

# Part 1 break it into subquestions


# We'll use Pydantic to do some output enforcement
# (It's just prompting and parsing under the hood)
class SubQuestion(BaseModel):
    question: str = Field(description="The subquestion itself.")
    query: str = Field(
        description="The query to pass to the `fetch_tools` function to retrieve the appropriate tool to answer the question."
    )

class SubQuestionList(BaseModel):
    subquestions: list[SubQuestion] = Field(
        description="The list of SubQuestion objects."
    )

subquestion_parser = PydanticOutputParser(pydantic_object=SubQuestionList)

system_message = """\
You are a world-class state-of-the-art agent.

You can access multiple tools, via a "fetch_tools" function that will retrieve the necessary tools.
The `fetch_tools` function accepts a string of keywords as input specifying the type of tool to retrieve.
Each retrieved tool represents a different data source or API that can retrieve the required data.

Your purpose is to help answer a complex user question by generating a list of sub
questions, as well as the corresponding keyword query to the "fetch_tools" function
to retrieve the relevant tools to answer each corresponding subquestion.

These are the guidelines you consider when completing your task:
* Be as specific as possible
* Avoid using acronyms
* The sub-questions should be relevant to the user question
* The sub-questions should be answerable by the tools retrieved by the query to `fetch_tools`
* You can generate multiple sub-questions
* You don't need to query for a tool if you don't think it's relevant

## Output format
{format_instructions}

### Example responses
```json
{{"subquestions": [
    {{
        "question": "What are the latest financial statements of AMZN?", 
        "query": "financial statements"
    }}, 
    {{
        "question": "What is the most recent revenue and profit margin of AMZN?", 
        "query": "revenue profit margin ratios"
    }}, 
    {{
        "question": "What is the current price to earnings (P/E) ratio of AMZN?", 
        "query": "ratio price to earnings"
    }}, 
    {{
        "question": "What is the current debt-to-equity ratio of AMZN?", 
        "query": "debt-equity ratio"
    }}
]}}
```

"""

human_message = """\
    ## User Question
    {input}
    """

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message),
        ("human", human_message),
    ]
)
prompt = prompt.partial(
    format_instructions=subquestion_parser.get_format_instructions()
)

llm = ChatOpenAI(
    model="gpt-4"
)  # gpt-3.5-turbo works well, but gpt-4-1106-preview isn't good at returning JSON.

subquestion_chain = {"input": lambda x: x["input"]} | prompt | llm | subquestion_parser

# Our high-level question we're going to attempt to answer
INPUT = "Perform a fundamentals financial analysis of AMZN using the most recently available data. What do you find that's interesting?"

subquestion_list = subquestion_chain.invoke({"input": INPUT})

for subquestion in subquestion_list.subquestions:
    print(
        subquestion
    )  # We probably won't have all the right tools to answer these questions. Improvements for the future!

question='What are the latest financial statements of AMZN?' query='financial statements AMZN'
question='What is the most recent revenue and profit margin of AMZN?' query='revenue profit margin AMZN'
question='What is the current price to earnings (P/E) ratio of AMZN?' query='price to earnings ratio AMZN'
question='What is the current debt-to-equity ratio of AMZN?' query='debt-equity ratio AMZN'
question='What is the current market capitalization of AMZN?' query='market capitalization AMZN'
question='What is the current return on equity (ROE) of AMZN?' query='return on equity AMZN'
question='What is the current dividend yield of AMZN?' query='dividend yield AMZN'
question='What is the current earnings per share (EPS) of AMZN?' query='earnings per share AMZN'


In [6]:
# Part 2 is to fetch the appropriate tool for each subquestion
# (We'll create a new data structure here we can re-use)

subquestions_and_tools = []

for subquestion in subquestion_list.subquestions:
    tools = get_tools(subquestion.query)
    subquestions_and_tools.append(
        {
            "subquestion": subquestion.question,
            "query": subquestion.query,
            "tools": tools,
        }
    )

for subq in subquestions_and_tools:
    print(subq["subquestion"])
    for tool in subq["tools"]:
        print("  " + tool.name + ": " + tool.description.split('\n')[0])
    print("")

What are the latest financial statements of AMZN?
  income: Income Statement. Report on a company's financial performance.
  balance: Balance Sheet. Balance sheet statement.

What is the most recent revenue and profit margin of AMZN?
  revenue_per_geography: Revenue Geographic. Geographic revenue data.
  income: Income Statement. Report on a company's financial performance.

What is the current price to earnings (P/E) ratio of AMZN?
  multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.
  ratios: Extensive set of ratios over time. Financial ratios for a given company.

What is the current debt-to-equity ratio of AMZN?
  ratios: Extensive set of ratios over time. Financial ratios for a given company.
  multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.

What is the current market capitalization of AMZN?
  multiples: Equity Valuation Multiples. Valuation multiples for a stock ticker.
  metrics: Key Metrics. Key metrics for a given com

In [7]:
# Part 3 is to answer each of the subqueries. We'll use a ReAct agent paired with the subquestion and appropriate tools to do this.

agents = []
for i, subquestion in enumerate(subquestions_and_tools):
    react_agent = langchain_react_agent(tools=subquestion["tools"])
    agents.append(react_agent)

len(agents)

8

In [8]:
# Run the agents to answer the subquestions
for i, subquestion in enumerate(subquestions_and_tools):
    input = f"""\
    Given the following high-level question: {INPUT}
    Answer only the following subquestion: {subquestion}

    Give your answer in a bullet-point list.
    Explain your reasoning, and make reference to the retrieved data.

    Example output format:
    ```
    - <the first observation, insight, and/or conclusion> 
    - <the second observation, insight, and/or conclusion> 
    - <the third observation, insight, and/or conclusion> 
    ... REPEAT AS MANY TIMES AS NECESSARY TO ANSWER THE SUBQUESTION.
    ```

    Return only your answer as a bulleted list as a single string. Don't respond with JSON or any other kind of datastructure.
    """
    try:
        result = agents[i].invoke({"input": input})
        output = result["output"]
    except Exception:  # Terrible practice, but it'll do for now.
        output = "I was unable to answer the subquestion using the available tool."  # We'll include the error message in the future

    print(subquestion["subquestion"])
    print("----")
    print(output)
    print("=======")

    # We'll misbehave and re-use the same datastructure again
    subquestion["observation"] = output

What are the latest financial statements of AMZN?
----
- AMZN reported a total revenue of $513.98 billion for the fiscal year ending December 31, 2022.
- The company's cost of revenue was $288.83 billion, resulting in a gross profit of $225.15 billion.
- AMZN spent $73.21 billion on research and development and $42.24 billion on selling and marketing expenses.
- The operating income was $12.25 billion, but the company reported a net loss of $2.72 billion.
- The total assets reported were $462.68 billion, with $53.89 billion in cash and cash equivalents.
- Long term debt stood at $140.12 billion, contributing to a total equity of $146.04 billion.
- Despite the significant revenue, the net income ratio was negative, indicating a loss per share of $-0.27.
What is the most recent revenue and profit margin of AMZN?
----
- AMZN's most recent annual revenue, as of the end of 2022, was approximately $513.98 billion.
- The gross profit for the same period was about $225.15 billion, resulting in

In [9]:
# Part 4 is to generate a response based on the answers to each of the subquestions

def render_subquestions_and_answers(subquestions):
    output = ""
    for subquestion in subquestions:
        output += "Subquestion: " + subquestion["subquestion"] + "\n"
        output += "Observations: \n" + subquestion["observation"] + "\n\n"

    return output


system_message = """\
    Given the following high-level question: 

    Question: {input}

    And the following subquestions and subsequent observations:

    {subquestions}

    Answer the high-level question. Give your answer in a bulleted list.
    """


prompt = ChatPromptTemplate.from_messages([("system", system_message)])

llm = ChatOpenAI(model="gpt-4")  # Let's use the big model for the final answer.

final_chain = (
    {
        "input": lambda x: x["input"],
        "subquestions": lambda x: render_subquestions_and_answers(x["subquestions"]),
    }
    | prompt
    | llm
)

result = final_chain.invoke({"input": INPUT, "subquestions": subquestions_and_tools})

In [10]:
print(result.content)  # Et voila

- Amazon (AMZN) reported a total revenue of $513.98 billion for the fiscal year ending December 31, 2022.
- The company's gross profit for the same period was $225.15 billion, translating to a gross profit margin of approximately 43.8%.
- Despite its significant revenue, AMZN reported a net loss of about $2.72 billion, resulting in a negative net profit margin of about -0.53%.
- The current price to earnings (P/E) ratio of AMZN is approximately 75.43.
- The current debt-to-equity ratio of AMZN is approximately 0.9594, suggesting a balanced use of debt and equity for financing its operations.
- The company's current market capitalization stands at approximately $1.52 trillion.
- The current return on equity (ROE) for AMZN is -1.86%, indicating that the company generated a loss relative to the shareholders' equity for the recent fiscal year.
- Amazon does not pay a dividend, as indicated by its current dividend yield of 0.0%.
- For the period ending September 30, 2023, the company report