In [4]:
import pandas as pd

testing_df = pd.read_csv("openbb_question.csv")

In [8]:
testing_df['QUESTION'].values

array(['Give me the historical price data for Bitcoin cryptocurrency from yahoo finance',
       'Give me the historical price data for Ethereum cryptocurrency from fmp',
       'Give me the minute level price data for Bitcoin from January 2024 to April 2024',
       'Give me the quarter level price data for Solana from 2021 till now',
       'I want the data hourly price data for Solana for the current year from polygon',
       'I want the daily price data for Ethereum and Bitcoin for the current year from tiingo provider and POLONIEX exchange',
       'I want the dataframe for the daily price data Ethereum, Solana, Bitcoin from yahoo finance',
       'I want the minute level data for Ethereum from yahoo finance in dictionary format',
       'I want the minute level data in 2023 for Solana from fmp in numpy array',
       'I want the cryptocurrency pair for Bitcoin',
       'I want the dataframe for the minute level price data Ethereum, Solana, Bitcoin from yahoo finance',
       'Gi

In [9]:
from agent.database import load_database
from dotenv import load_dotenv,find_dotenv
import os
load_dotenv(find_dotenv(),override=True)
openbb_collection = load_database(os.environ['OPENAI_API_KEY'])

In [11]:
from agent.dspy_agent import OpenBBAgentChroma

obb_chroma = OpenBBAgentChroma(openbb_collection)

In [23]:
funcs = obb_chroma('Give me the historical price data for Bitcoin cryptocurrency from yahoo finance')

[92mFirst level answer: crypto[0m
[93mCurrent Trail: ['crypto'] and level: 2[0m
[91mSubsequent level 2 string to LLM: price: Get historical price data for cryptocurrency pair(s) within a provider.

search: Search available cryptocurrency pairs within a provider.

[0m
[94mLLM Answer: Prediction(
    rationale='produce the output. We need to find the key that provides historical price data for a specific cryptocurrency from a specific source.',
    output='price'
)[0m
[93mCurrent Trail: ['crypto-->price'] and level: 3[0m


In [25]:
funcs[0]['metadatas']

[{'function_call': '{\'name\': \'obb_crypto_price_historical_fmp\', \'description\': \'Get historical price data for cryptocurrency pair(s) within a provider. Get it from provider fmp\', \'parameters\': {\'type\': \'object\', \'properties\': {\'symbol\': {\'type\': \'string\', \'description\': \'Symbol to get data for. Can use CURR1-CURR2 or CURR1CURR2 format. Multiple items allowed for provider(s): fmp, polygon, tiingo, yfinance.\', \'default\': \'\'}, \'start_date\': {\'type\': \'string\', \'description\': \'Start date of the data, in YYYY-MM-DD format.\', \'default\': None}, \'end_date\': {\'type\': \'string\', \'description\': \'End date of the data, in YYYY-MM-DD format.\', \'default\': None}, \'provider\': {\'description\': "The provider to use for the query, by default None. If None, the provider specified in defaults is selected or \'fmp\' if there is no default.", \'default\': \'fmp\', \'type\': \'string\', \'enum\': [\'fmp\']}, \'interval\': {\'name\': \'interval\', \'descrip

In [32]:
funcs = obb_chroma('Get the options chain for GOOGL for the year 2024')
funcs[0]['metadatas']

[92mFirst level answer: equity; derivatives[0m
[93mCurrent Trail: ['equity', 'derivatives'] and level: 2[0m
[91mSubsequent level 2 string to LLM: options: Get the complete options chain for a ticker.

discovery: Get the top price gainers in the stock market. Get the top price losers in the stock market. Get the most actively traded stocks based on volume. Get potentially undervalued large cap stocks. Get potentially undervalued growth stocks. Get top small cap stocks based on earnings growth. Get top tech stocks based on revenue and earnings growth. Get the URLs to SEC filings reported to EDGAR database, such as 10-K, 10-Q, 8-K, and more.SEC filings include Form 10-K, Form 10-Q, Form 8-K, the proxy statement, Forms 3, 4, and 5, Schedule 13, Form 114,Foreign Investment Disclosures and others. The annual 10-K report is required to befiled annually and includes the company's financial statements, management discussion and analysis,and audited financial statements.

market_snapshots: 

[{'function_call': '{\'name\': \'obb_derivatives_options_chains_intrinio\', \'description\': \'Get the complete options chain for a ticker.\', \'parameters\': {\'type\': \'object\', \'properties\': {\'symbol\': {\'type\': \'string\', \'description\': \'Symbol to get data for.\', \'default\': \'\'}, \'provider\': {\'description\': "The provider to use for the query, by default None. If None, the provider specified in defaults is selected or \'intrinio\' if there is no default.", \'default\': \'intrinio\', \'type\': \'string\', \'enum\': [\'intrinio\']}, \'date\': {\'type\': \'string\', \'description\': \'The end-of-day date for options chains data.\', \'default\': None}}, \'required\': [\'symbol\', \'provider\']}}',
  'node_name': 'obb_derivatives_options_chains_intrinio',
  'provider_source': 'intrinio',
  'trail': 'derivatives-->options-->chains',
  'type': 'provider_function'}]

In [33]:
for fn in funcs[0]['metadatas']:
    print(fn['node_name'].rpartition('_')[0])

obb_derivatives_options_chains


In [49]:
path = testing_df['PATHS'][0]
path = path.replace("\n","").split("/")
path[0] = "obb"
"_".join(path)

'obb_crypto_price_historical'

In [44]:
'/crypto/price/historical\n'.replace("\n","")

'/crypto/price/historical'

In [64]:
import time 
from tqdm import tqdm
import pandas as pd
import os

def is_file_empty(file_path):
    return os.path.getsize(file_path) == 0

testing_df = pd.read_csv("openbb_question.csv")
columns = ["QUESTION","TIME","ANSWER","LLM_ANSWER"]
if not os.path.exists(f"hierarchical_answers.csv"):
    df = pd.DataFrame(columns=columns,index=None)
    df.to_csv(f"hierarchical_answers.csv",index=False,header=True)
else:
    df = pd.read_csv(f"hierarchical_answers.csv",index_col=False)

pbar = tqdm(total=len(testing_df),desc="Hierarchical Answers LLM")
if not os.path.exists(f"done.txt"):
    os.mknod(f"done.txt")
done_idxs = 0
for row in testing_df.iterrows():
    if not is_file_empty(f"done.txt"):
        with open(f"done.txt","r") as f:
            done_pids = [int(x) for x in f.read().splitlines()]
        # print(done_pids)
        if done_idxs in done_pids: 
            print(f"Already done for {done_idxs}")
            pbar.update(1)
            done_idxs += 1
            continue
    path = row[1]['PATHS']
    path = path.replace("\n","").split("/")
    path[0] = "obb"
    answer = "_".join(path)

    question  = row[1]['QUESTION']
    try:
        start = time.time()
        functions = obb_chroma(question)
        end = time.time()
        time_taken = end - start

        llm_answers = []
        for fn in functions[0]['metadatas']:
            llm_answers.append(fn['node_name'].rpartition('_')[0])
        llm_answers = list(set(llm_answers))
        llm_answer = ", ".join(llm_answers)
        with open(f"done.txt","a") as f:
            f.write(f"{done_idxs}\n")
        done_idxs+=1
    except:
        llm_answer = ""
        time_taken = 0
        done_idxs+=1

    df_dict = {
        "QUESTION":[question],
        "TIME":[time_taken],
        "ANSWER":[answer],
        "LLM_ANSWER":[llm_answer],
    }
    print(df_dict)
    curr_df = pd.DataFrame(df_dict)
    curr_df.to_csv(f"hierarchical_answers.csv", mode='a',index=False,header=False)
    pbar.update(1)

Hierarchical Answers LLM: 100%|██████████| 219/219 [00:28<00:00,  7.59it/s]


Already done for 0
Already done for 1
Already done for 2
[92mFirst level answer: crypto[0m
[93mCurrent Trail: ['crypto'] and level: 2[0m
[91mSubsequent level 2 string to LLM: price: Get historical price data for cryptocurrency pair(s) within a provider.

search: Search available cryptocurrency pairs within a provider.

[0m




[94mLLM Answer: Prediction(
    rationale='produce the output. We need to find historical price data for a specific cryptocurrency, Solana, from 2021 till now.',
    output='price'
)[0m
[93mCurrent Trail: ['crypto-->price'] and level: 3[0m
{'QUESTION': ['Give me the quarter level price data for Solana from 2021 till now'], 'TIME': [2.30118727684021], 'ANSWER': ['obb_crypto_price_historical'], 'LLM_ANSWER': ['obb_crypto_price_historical']}
[92mFirst level answer: crypto[0m
[93mCurrent Trail: ['crypto'] and level: 2[0m
[91mSubsequent level 2 string to LLM: price: Get historical price data for cryptocurrency pair(s) within a provider.

search: Search available cryptocurrency pairs within a provider.

[0m




[94mLLM Answer: Prediction(
    rationale='produce the output. We need to find the key that provides historical price data for a specific cryptocurrency, in this case Solana, for the current year from the provider Polygon.',
    output='price'
)[0m
[93mCurrent Trail: ['crypto-->price'] and level: 3[0m
{'QUESTION': ['I want the data hourly price data for Solana for the current year from polygon'], 'TIME': [2.4780735969543457], 'ANSWER': ['obb_crypto_price_historical'], 'LLM_ANSWER': ['obb_crypto_price_historical']}
[92mFirst level answer: crypto[0m
[93mCurrent Trail: ['crypto'] and level: 2[0m
[91mSubsequent level 2 string to LLM: price: Get historical price data for cryptocurrency pair(s) within a provider.

search: Search available cryptocurrency pairs within a provider.

[0m




[94mLLM Answer: Prediction(
    rationale='produce the output. We need to find the key that allows us to get historical price data for cryptocurrency pairs from a specific provider and exchange.',
    output='price'
)[0m
[93mCurrent Trail: ['crypto-->price'] and level: 3[0m
{'QUESTION': ['I want the daily price data for Ethereum and Bitcoin for the current year from tiingo provider and POLONIEX exchange'], 'TIME': [1.7774412631988525], 'ANSWER': ['obb_crypto_price_historical'], 'LLM_ANSWER': ['obb_crypto_price_historical']}




{'QUESTION': ['I want the dataframe for the daily price data Ethereum, Solana, Bitcoin from yahoo finance'], 'TIME': [0], 'ANSWER': ['obb_crypto_price_historical'], 'LLM_ANSWER': ['']}


In [52]:
",".join(["1"])

'1'