In [57]:
from IPython.display import Markdown,display,HTML
from vertexai.generative_models import Part  

import sys
import json
import os
import importlib
import math
import html

sys.path.append(os.path.abspath('../utils'))

import tool_functions
importlib.reload(tool_functions)
from tool_functions import convert_to_tool # convert self-defined functions to Tool objects

import rag
importlib.reload(rag)
from rag import RAG # RAG search function

import react_agent
importlib.reload(react_agent)
from react_agent import ReactAgent

### create some basic tools for test

In [10]:
def sum_two_elements(a: int, b: int) -> int:
    """
    Computes the sum of two integers.

    Args:
        a (int): The first integer to be summed.
        b (int): The second integer to be summed.

    Returns:
        int: The sum of `a` and `b`.
    """
    return a + b


def multiply_two_elements(a: int, b: int) -> int:
    """
    Multiplies two integers.

    Args:
        a (int): The first integer to multiply.
        b (int): The second integer to multiply.

    Returns:
        int: The product of `a` and `b`.
    """
    return a * b

def compute_log(x: int) -> float | str:
    """
    Computes the logarithm of an integer `x` with an optional base.

    Args:
        x (int): The integer value for which the logarithm is computed. Must be greater than 0.

    Returns:
        float: The logarithm of `x` to the specified `base`.
    """
    if x <= 0:
        return "Logarithm is undefined for values less than or equal to 0."
    
    return math.log(x)


sum_two_elements_tool=convert_to_tool(sum_two_elements)
multiply_two_elements_tool=convert_to_tool(multiply_two_elements)
compute_log_tool=convert_to_tool(compute_log)


# Load the RAG

In [4]:
# read the data, create an RAG class on that data, create the search function
data_path='../data/naf23.pdf'

from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

# create a faiss_vector_store
pdf_reader = PdfReader(data_path)
text = ""
chunks=[]

# read the pages
for page in pdf_reader.pages:
    text += page.extract_text()

# text_splitter.create_documents() splits the text into chunks, then change each chunk into a 'Document' object of Langchain
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
chunks=text_splitter.create_documents([text])

# embedding model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

# faiss_vector_store
faiss_vector_store = FAISS.from_documents(chunks, embedding_model)

  from tqdm.autonotebook import tqdm, trange


In [5]:
rag_instance=RAG(pdf_path= data_path, chunking_method='recursive', faiss_vector_store=faiss_vector_store)

def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    results = rag_instance.search(query, method='ensemble')
    return json.dumps({"relevant information in the auditor notes": results})

# convert this search function to a Tool object
search_tool = convert_to_tool(search)

# ReAct Agent

In [8]:
agent = ReactAgent(tools=[search_tool, sum_two_elements_tool, multiply_two_elements_tool, compute_log_tool],
                  print_system_prompt=False)


## ReAct answering financial questions

In [9]:
agent.run(user_msg='What caused the significant change in net assets with donor restrictions between 2022 and 2023?',
         max_iterations=10)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "Money market funds 10,009             -                        -                        10,009             \nMutual funds and stocks 1,066,325       -                        -                        1,066,325       \nTotal Assets at Fair Value 2,788,623$     - $                     - $                     2,788,623$     \nTotal Assets\nMeasured at\nLevel 1 Level 2 Level 3 Fair Value\nCash 76,823 $          - $                     - $                     76,823 $          \nMoney market funds 1,642,051       -                        -                        1,642,051       \nMutual funds and stocks 989,122           -                        -                        989,122           \nTotal Assets at Fair Value 2,707,996$     - $                     - $                     2,707,996$     Fair Value Measurements Using\nFair Value Measurements Using2023\n2022\n \n \nThe Foundation does not have any liabilities measured at fair value on a recu

{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

"The increase in net assets with donor restrictions from $2,041,806 in 2022 to $2,680,567 in 2023 is likely due to new contributions received with donor-imposed restrictions.  However, the specific details of these contributions are not available within the provided financial summaries.  A more detailed review of the Foundation's financial records would be needed to determine the exact sources and amounts of these restricted contributions."

## ReAct answering simple questions

In [12]:
agent.run(user_msg='I want to calculate the sum of 1234 and 5678 and multiply the result by 5. Then, I want to take the logarithm of this result',
         max_iterations=10) 

---------------------------------------- iteration 0  ----------------------------------------


6912
---------------------------------------- iteration 1  ----------------------------------------


34560
---------------------------------------- iteration 2  ----------------------------------------


10.450452222917992
---------------------------------------- iteration 3  ----------------------------------------


'The final result after summing 1234 and 5678, multiplying by 5, and taking the logarithm is approximately 10.45.'

## Search function returns the entire pdf read by Part.from_data()

In [33]:
data_path='../data/naf23.pdf'
auditor_notes_doc=None
with open(data_path, 'rb') as fp:
    auditor_notes_doc=Part.from_data(data=fp.read(),mime_type='application/pdf')

In [34]:
type(auditor_notes_doc)

vertexai.generative_models._generative_models.Part

In [35]:
if isinstance(auditor_notes_doc, Part):
    print("Object is of type Part")
else:
    print("Object is not of type Part")

Object is of type Part


In [36]:
# define a search function that returns the entire pdf read from Part.from_data()
def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    return auditor_notes_doc

# convert this search function to a Tool object
search_tool = convert_to_tool(search)

In [58]:
agent = ReactAgent(tools=[search_tool],
                  print_system_prompt=False)

In [59]:

# user_msg='What caused the significant change in net assets with donor restrictions between 2022 and 2023?'
user_msg='What is the total revenue of Naitonal Ataxia Foundation in 2023'
agent.run(user_msg=user_msg, max_iterations=10)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


'The total revenue of National Ataxia Foundation in 2023 is $4,184,787.'

In [53]:
agent.run(user_msg='What caused the significant change in net assets with donor restrictions between 2022 and 2023 for National Ataxia Foundation?',
         max_iterations=10)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 2  ----------------------------------------


"The National Ataxia Foundation's net assets with donor restrictions decreased by $361,241 during 2023.  This is primarily due to $1,614,715 in contributions restricted by donors, offset by a release of $975,954 of those restrictions during the year.  The net assets with donor restrictions at the beginning of 2023 were $2,041,806 and at the end of the year were $2,680,567."