In [1]:
import os
from typing import List, Optional
from pydantic import BaseModel, Field
import instructor
import openai

from langchain.tools import tool
from langchain.agents import AgentExecutor, Tool
from langchain.vectorstores import FAISS
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.tools.render import format_tool_to_openai_function
from langchain.agents.format_scratchpad import format_to_openai_functions
from langchain.agents import AgentExecutor
from langchain.prompts import MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
from langchain.schema.messages import HumanMessage
from langchain.schema.agent import AgentFinish
from langchain.embeddings import SentenceTransformerEmbeddings

from openbb import obb
from openbb_provider import standard_models

# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
openai.api_key = "YOUR_API_KEY"

In [2]:
%%capture
pat = "YOUR_API_KEY"  # get it at https://my.openbb.co
obb.account.login(pat=pat)

# Transform OpenBB commands into OpenAI tools
- Currently need to do: to_df().to_dict() because of a bug at the core that will be fixed
- Ideally, OpenBB provides this list out-of-the-box

### This creates the pydantic schema from scratch

In [3]:
# Cannot define InputSchema because langchain 2.4.2 doesn't support pydantic > 2, rip
# ValidationError: 1 validation error for StructuredTool
# args_schema
#  subclass of BaseModel expected (type=type_error.subclass; expected_class=BaseModel)

class InputSchema(BaseModel):
    symbol: str = Field(..., description="Symbol to get data for")
    provider: Optional[str] = Field(description="The provider to use for the query", default=None)

@tool#(args_schema=InputSchema)
def get_peers_associated_with_symbol(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get peers associated with a symbol"""

    return obb.stocks.ca.peers(symbol=symbol, provider=provider).to_df().to_dict()


class InputSchema(BaseModel):
    symbol: str = Field(..., description="Symbol to get data for")
    provider: Optional[str] = Field(description="The provider to use", default=None)

@tool#(args_schema=InputSchema)
def get_generic_info_from_symbol(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get generic information from a symbol including market_cap, volume, eps"""

    return obb.stocks.quote(symbol=symbol, provider=provider).to_dict()


class InputSchema(BaseModel):
    symbol: str = Field(..., description="Symbol to get data for")
    provider: Optional[str] = Field(description="The provider to use", default=None)

@tool#(args_schema=InputSchema)
def get_price_target_with_analyst(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get price target for symbol of interest, date of rating and rating value"""

    return obb.stocks.fa.pta(symbol=symbol, provider=provider).to_df().head().to_dict()

# vars(get_generic_info_from_symbol)

### This relies on OpenBB pydantic schema

In [4]:
@tool#(args_schema=standard_models.stock_peers.StockPeersQueryParams)
def get_peers_associated_with_symbol(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get peers associated with a symbol"""

    return obb.stocks.ca.peers(symbol=symbol, provider=provider).to_df().to_dict()

# The current pydantic model has a bad description which interferes with prompt length, fix for now
standard_models.stock_quote.StockQuoteQueryParams.model_json_schema()['properties']["symbol"]["description"] = "Symbol to get data from"

@tool#(args_schema=standard_models.stock_quote.StockQuoteQueryParams)
def get_generic_info_from_symbol(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get generic information from a symbol including market_cap, volume, eps"""

    return obb.stocks.quote(symbol=symbol, provider=provider).to_df().to_dict()


@tool#(args_schema=standard_models.price_target.PriceTargetQueryParams)
def get_price_target_with_analyst(
    symbol: str,
    provider: str = None,
) -> dict:
    """Get price target for one single symbol of interest, date of rating and rating value"""

    # Here we need to get the head of last 5 values, otherwise we go above limit of tokens per context
    return obb.stocks.fa.pta(symbol=symbol, provider=provider).to_df().head().to_dict()

# vars(get_generic_info_from_symbol)

### Check that the pydantic models are well defined 

In [5]:
standard_models.stock_quote.StockQuoteQueryParams.model_json_schema()['properties']

{'symbol': {'default': None,
  'description': 'Comma separated list of symbols.',
  'title': 'Symbol',
  'type': 'string'}}

In [6]:
class InputSchema(BaseModel):
    symbol: str = Field(..., description="Symbol to get data for")
    provider: Optional[str] = Field(description="The provider to use", default=None)
    
InputSchema.model_json_schema()['properties']

{'symbol': {'description': 'Symbol to get data for',
  'title': 'Symbol',
  'type': 'string'},
 'provider': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
  'default': None,
  'description': 'The provider to use',
  'title': 'Provider'}}

## List of available tools

In [7]:
# Ideally OpenBB provides this list by default
tools = [
    get_peers_associated_with_symbol,
    get_generic_info_from_symbol,
    get_price_target_with_analyst
]

In [8]:
# Let us create fake tools to show that this can scale well
def fake_func(inp: str) -> str:
    return "foo"
    
fake_tools = [
    Tool(
        name=f"foo-{i}",
        func=fake_func,
        description=f"a silly function that you can use to get more information about the number {i}",
    )
    for i in range(99)
]

tools += fake_tools

# Query to be used by analyst

In [9]:
query = """
    Check what are TSLA peers. From those, check which one has the highest market cap.
    Then, on the ticker that has the highest market cap get the most recent rating from an analyst.
    And tell me who was the analyst and what date was it that the rating was done.
"""

# Based on query select list of tools to be used by OpenAI

### Create a vector of embeddings for the descriptions of each tool (including fake ones)

In [10]:
# Create a list of documents made of function descriptions
docs = [
    Document(page_content=t.description, metadata={"index": i})
    for i, t in enumerate(tools)
]

# These embeddings are much better for semantic search
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Create VectorStore from documents and embeddings
# could also have used Pinecode, Chroma or Weaviate
vector_store = FAISS.from_documents(
    docs,
    embeddings # Could also use: OpenAIEmbeddings()
)

# We can assume there are not more than 2 simple functions necessary for a single task from an analyst workflow
# note that a more complex workflow will be narrowed down into simpler tasks
retriever = vector_store.as_retriever(
    search_type="similarity",
)

  from .autonotebook import tqdm as notebook_tqdm
Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  3.52it/s]


In [11]:
# Define a function that returns the most relevant tools based on a user prompt
def get_tools(smaller_query):

    # Retrieve documents relevant to a query
    docs = retriever.get_relevant_documents(smaller_query);

    # Return the list of tools that are the most relevant
    return [tools[d.metadata["index"]] for d in docs]    

### Simplify query in multiple simpler tasks

Since a query may contain multiple tasks, the tool retrieval function wasn't leading to the best results. So we simplify to conquer.

In [12]:
# Define pydantic model to be output from OpenAI when simplifying a workflow into simpler tasks
class Task(BaseModel):
    """Simple task to be performed"""
    description: str = Field(..., description="Description of the task to be achieved")

class TaskList(BaseModel):
    """List of simple tasks to be performed"""
    tasks: List[Task]

In [13]:
instructor.patch()

completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0613",
    response_model=TaskList,
    max_retries=2,
    messages=[
        {
            "role": "system", 
             "content": "You are extremely good at reasoning and creating a list of tasks. Add the context behind each task, but keep it simple."
        },
        {
            "role": "user",
            "content": f"I want you to decompose this query into a list of tasks to achieve: {query}"
        },
    ],
)

# Show what tasks need to be done for the query selected
print("The current query can be decomposed into:")
for t in completion.tasks:
    print(f"- {t.description}")

simpler_tasks = [t.description for t in completion.tasks]

The current query can be decomposed into:
- Check what are TSLA peers
- From the peers, check which one has the highest market cap
- On the ticker with the highest market cap, get the most recent rating from an analyst
- Retrieve the analyst and the date of the rating
- Provide the name of the analyst and the date of the rating


### Grab a function for each of the simpler tasks

In [14]:
relevant_tools = list()
for t in simpler_tasks:
    print(f"Task: {t}")
    tools_for_task = get_tools(t)
    for tool_for_task in tools_for_task:
        print(f"Tool to use: {tool_for_task.name}")
    relevant_tools.append(tools_for_task[0])
    print("\n")

Task: Check what are TSLA peers


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 74.66it/s]


Tool to use: get_peers_associated_with_symbol
Tool to use: get_price_target_with_analyst
Tool to use: foo-22
Tool to use: get_generic_info_from_symbol


Task: From the peers, check which one has the highest market cap


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.23it/s]


Tool to use: get_generic_info_from_symbol
Tool to use: get_price_target_with_analyst
Tool to use: get_peers_associated_with_symbol
Tool to use: foo-1


Task: On the ticker with the highest market cap, get the most recent rating from an analyst


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 59.45it/s]


Tool to use: get_price_target_with_analyst
Tool to use: foo-15
Tool to use: foo-75
Tool to use: foo-30


Task: Retrieve the analyst and the date of the rating


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 80.06it/s]


Tool to use: get_price_target_with_analyst
Tool to use: foo-16
Tool to use: foo-70
Tool to use: foo-76


Task: Provide the name of the analyst and the date of the rating


Batches: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 77.20it/s]

Tool to use: get_price_target_with_analyst
Tool to use: foo-16
Tool to use: foo-18
Tool to use: foo-70







In [15]:
# Remove duplicates functions
relevant_tools = [x for i, x in enumerate(relevant_tools) if i == relevant_tools.index(x)]

# OpenBB Agent

In [16]:
# Currently relevant_tools are coming from above
# I.e. there's 1 tool being retrieved from each simpler task description comparison with
# tools available using embeddings

# We should also allow users to specify which tools they want the analyst to have access to
# when performing a task
# relevant_tools = [
#     get_peers_associated_with_symbol,
#     get_generic_info_from_symbol,
#     get_price_target_with_analyst
# ]

# Format tools to a format accepted by OpenAI
functions = [format_tool_to_openai_function(f) for f in relevant_tools]

# Bind the functions to an OpenAI model 
model = ChatOpenAI(
    temperature=0,
    model="gpt-4", # Use gpt-3.5 for cheaper cost
).bind(functions=functions)

# Prompt template
# - comment chat_history if you don't want the agent to know about the past conversation
# - comment agent_scratchpad if you don't want the agent to use previous outputs as inputs
prompt = ChatPromptTemplate.from_messages([
    ("system", """
        You are an expert financial analyst with a 30+ year successful career in finance.
        You must ALWAYS use at least one of the tools provided when answering a question.
    """),
    MessagesPlaceholder(variable_name="chat_history"),
    ("user", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad")
])

# Creates the chain
chain = RunnablePassthrough.assign(
    agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"])
) | prompt | model | OpenAIFunctionsAgentOutputParser()

# Create a memory for the agent to recall what was chatted in the past
memory = ConversationBufferMemory(
    return_messages=True,
    memory_key="chat_history"
)

# Create an agent that has access to the relevant tools and has memory
agent_executor = AgentExecutor(
    agent=chain,
    tools=relevant_tools,
    verbose=True,
    memory=memory
)

agent_executor.invoke({"input": query})

# Since our agent has a memory and access to past messages
# we could also iterate through the list of simpler tasks to be performed
# for t in simpler_tasks:
#     agent_executor.invoke({"input": t})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_peers_associated_with_symbol` with `{'symbol': 'TSLA'}`


[0m[36;1m[1;3m{0: {0: 'symbol', 1: 'peers_list'}, 1: {0: 'TSLA', 1: ['XPEV', 'LI', 'RIVN', 'LCID', 'GM', 'NIO', 'F', 'FSR', 'MULN']}}[0m[32;1m[1;3m
Invoking: `get_generic_info_from_symbol` with `{'symbol': 'XPEV'}`


[0m[33;1m[1;3m{'day_low': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 15.1}, 'day_high': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 15.665}, 'symbol': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 'XPEV'}, 'name': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 'XPeng Inc.'}, 'price': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 15.38}, 'changes_percentage': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 0.1954}, 'change': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 0.03}, 'year_high': {Timestamp('2023-10-17 20:00:02+0000', tz='UTC'): 23.62}, 'year_low': {Timestamp('2023-10-17 20:00:02+0000', tz='UT

{'input': '\n    Check what are TSLA peers. From those, check which one has the highest market cap.\n    Then, on the ticker that has the highest market cap get the most recent rating from an analyst.\n    And tell me who was the analyst and what date was it that the rating was done.\n',
 'chat_history': [HumanMessage(content='\n    Check what are TSLA peers. From those, check which one has the highest market cap.\n    Then, on the ticker that has the highest market cap get the most recent rating from an analyst.\n    And tell me who was the analyst and what date was it that the rating was done.\n'),
  AIMessage(content='The most recent rating for Ford Motor Company (F) was done by Adam Jonas from Morgan Stanley on July 31, 2023. The news was published on Benzinga with the title "Ford\'s $4.5 Billion EV Loss Overshadowed by Q2 Revenue Surge: Morgan Stanley Sees Potential Shift in EV Strategy". The price target set by the analyst was $16.')],
 'output': 'The most recent rating for Ford 