# This notebook tests the difference between different runs using Gemini 002

In [1]:
from vertexai.generative_models import GenerationConfig, GenerativeModel, Content, Part  


In [2]:
from IPython.display import Markdown,display,HTML


import sys
import json
import os
import importlib
import math
import html

sys.path.append(os.path.abspath('../utils'))

import tool_functions
importlib.reload(tool_functions)
from tool_functions import convert_to_tool # convert self-defined functions to Tool objects

import rag
importlib.reload(rag)
from rag import RAG # RAG search function

import react_agent
importlib.reload(react_agent)
from react_agent import ReactAgent

### RAG tool 

In [3]:
# read the data, create an RAG class on that data, create the search function
data_path='../data/naf23.pdf'

from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

# create a faiss_vector_store
pdf_reader = PdfReader(data_path)
text = ""
chunks=[]

# read the pages
for page in pdf_reader.pages:
    text += page.extract_text()

# text_splitter.create_documents() splits the text into chunks, then change each chunk into a 'Document' object of Langchain
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
chunks=text_splitter.create_documents([text])

# embedding model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')

# faiss_vector_store
faiss_vector_store = FAISS.from_documents(chunks, embedding_model)

rag_instance=RAG(pdf_path= data_path, chunking_method='recursive', faiss_vector_store=faiss_vector_store)

def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    results = rag_instance.search(query, method='ensemble')
    return json.dumps({"relevant information in the auditor notes": results})

# convert this search function to a Tool object
search_tool = convert_to_tool(search)

  from tqdm.autonotebook import tqdm, trange


## ReAct with RAG

In [4]:
agent = ReactAgent(tools=[search_tool],model='gemini-1.5-pro-002')


In [5]:
query="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2022 and 2023?"


### run 1

In [6]:
agent.run(user_msg=query)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "Total Assets 4,116,826$     5,008,595$      \nLiabilities and Net Assets\nCurrent Liabilities\nAccounts payable 298,827 $        440,809 $         \nAccrued payroll and related expenses 35,140             29,226             \nDeferred revenue 109,810 22,291\nOperating lease liability, current portion 32,750 31,548\nTotal Current Liabilities 476,527           523,874           \nLong-term Operating Lease Liability, Less Current Portion 25,353 58,104\nTotal Liabilities 501,880           581,978           \nNet Assets\nWithout donor restriction\nBoard designated - operating reserve 396,499           420,985           \nUndesignated 537,880           1,963,826       \nTotal Net Assets Without Donor Restriction 934,379           2,384,811       \nWith donor restriction 2,680,567       2,041,806       \nTotal Net Assets 3,614,946       4,426,617       \nTotal Liabilities and Net Assets 4,116,826$     5,008,595$      National Ataxia Foundation\nS

{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

"Based on the available information, the National Ataxia Foundation's net assets with donor restrictions decreased by $638,761 between 2022 and 2023.  Unfortunately, the provided data does not explain the specific reasons for this change.  A review of the complete audited financial statements or the annual report would be necessary to determine the exact cause of the decrease.  Possible reasons could include the fulfillment of donor-imposed restrictions, reclassification of assets, or investment losses within the donor-restricted funds.  However, without further information, I cannot confirm any of these possibilities."

### run 2

In [7]:
agent.run(user_msg=query)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "Total Assets 4,116,826$     5,008,595$      \nLiabilities and Net Assets\nCurrent Liabilities\nAccounts payable 298,827 $        440,809 $         \nAccrued payroll and related expenses 35,140             29,226             \nDeferred revenue 109,810 22,291\nOperating lease liability, current portion 32,750 31,548\nTotal Current Liabilities 476,527           523,874           \nLong-term Operating Lease Liability, Less Current Portion 25,353 58,104\nTotal Liabilities 501,880           581,978           \nNet Assets\nWithout donor restriction\nBoard designated - operating reserve 396,499           420,985           \nUndesignated 537,880           1,963,826       \nTotal Net Assets Without Donor Restriction 934,379           2,384,811       \nWith donor restriction 2,680,567       2,041,806       \nTotal Net Assets 3,614,946       4,426,617       \nTotal Liabilities and Net Assets 4,116,826$     5,008,595$      National Ataxia Foundation\nS

"The National Ataxia Foundation's net assets with donor restrictions decreased by $638,761 between 2022 and 2023.  The provided financial statements show $2,680,567 in net assets with donor restrictions in 2022 and $2,041,806 in 2023. While the information provided doesn't explain the *reason* for this change, further analysis of the financial statements may reveal the specific transactions or events that led to the decrease.  It's possible that restrictions were released or that there were fewer donations with restrictions in 2023 compared to 2022."

### run 3

In [8]:
agent.run(user_msg=query)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "Total Assets 4,116,826$     5,008,595$      \nLiabilities and Net Assets\nCurrent Liabilities\nAccounts payable 298,827 $        440,809 $         \nAccrued payroll and related expenses 35,140             29,226             \nDeferred revenue 109,810 22,291\nOperating lease liability, current portion 32,750 31,548\nTotal Current Liabilities 476,527           523,874           \nLong-term Operating Lease Liability, Less Current Portion 25,353 58,104\nTotal Liabilities 501,880           581,978           \nNet Assets\nWithout donor restriction\nBoard designated - operating reserve 396,499           420,985           \nUndesignated 537,880           1,963,826       \nTotal Net Assets Without Donor Restriction 934,379           2,384,811       \nWith donor restriction 2,680,567       2,041,806       \nTotal Net Assets 3,614,946       4,426,617       \nTotal Liabilities and Net Assets 4,116,826$     5,008,595$      National Ataxia Foundation\nS

{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

"The National Ataxia Foundation's financial statements indicate a decrease in net assets with donor restrictions from $2,680,567 in 2022 to $2,041,806 in 2023.  Unfortunately, the information I could locate does not explain the specific reasons for this change.  Further investigation, possibly by reviewing the full audited financial statements directly from the National Ataxia Foundation, would be necessary to determine the cause of this decrease."

* each run is slightly different
    * the first search is always the same, what follows the first search changes

### remove NAF in the query, we get complelety different answers

In [9]:
query="What caused the significant change in net assets with donor restrictions between 2022 and 2023?"

agent.run(user_msg=query)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

{"relevant information in the auditor notes": "Annual Financial  \nReport  \nNational Ataxia Foundation   \nSt. Louis Park, Minnesota  \n \n \nFor the years ended December 31, 2023  and 2022  \n National Ataxia Foundation  \nTable of Contents  \nDecember 31, 2023  and 2022  \nPage No.  \nIndependent Auditor's Report  3 \nFinancial Statements  \nStatement s of Financial Position  6 \nStatement s of Activities  7 \nStatements of Functional Expenses  9 \nStateme nts of Cash Flows  11 \nNotes to the Financial Statements  12 \n2 \n  \n \n \n \n \n \nINDEPENDENT AUDITOR'S REPORT  \n \n \nBoard of Directors  \nNational Ataxia Foundation  \nSt. Louis Park , Minnesota  \n \nOpinion  \n \nWe have audited the accompanying financial statements of National Ataxia Foundation  (the Foundation ), which comprise \nthe statements of financial position as of December 31, 2023  and 2022 , and the related statements of activities, \nfunctional expenses and cash flows for the years then ended, and the relat

{"relevant information in the auditor notes": "Total Assets 4,116,826$     5,008,595$      \nLiabilities and Net Assets\nCurrent Liabilities\nAccounts payable 298,827 $        440,809 $         \nAccrued payroll and related expenses 35,140             29,226             \nDeferred revenue 109,810 22,291\nOperating lease liability, current portion 32,750 31,548\nTotal Current Liabilities 476,527           523,874           \nLong-term Operating Lease Liability, Less Current Portion 25,353 58,104\nTotal Liabilities 501,880           581,978           \nNet Assets\nWithout donor restriction\nBoard designated - operating reserve 396,499           420,985           \nUndesignated 537,880           1,963,826       \nTotal Net Assets Without Donor Restriction 934,379           2,384,811       \nWith donor restriction 2,680,567       2,041,806       \nTotal Net Assets 3,614,946       4,426,617       \nTotal Liabilities and Net Assets 4,116,826$     5,008,595$      National Ataxia Foundation\nS

{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

"Several factors can cause significant changes in net assets with donor restrictions.  An increase could be due to new donations received with specific restrictions on their use, or investment income earned on existing restricted funds.  A decrease could result from the release of restrictions on previously restricted donations (either by meeting the conditions of the restriction or by passage of time), or by spending the restricted funds for their intended purpose.  Without access to the National Ataxia Foundation's Statement of Activities for 2022 and 2023, I cannot pinpoint the exact cause of the change in their net assets with donor restrictions.  You would need to consult their financial statements directly to determine the specific reasons for the change."

In [10]:
query="What caused the significant change in net assets with donor restrictions between 2022 and 2023?"
agent.run(user_msg=query)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

{"relevant information in the auditor notes": "of America requires management to make estimates and assumptions that affect certain reported amounts and \ndisclosures. Accordingly, actual results could differ from those estimates.  \n \nD. Cash and Cash Equivalents  \n \nFor purposes of the statement of cash flows, the Foundation considers short -term, highly liquid investments and \ninvestments purchased with a maturity of three months or less to be cash equivalents. The Foundation\u2019s cash balances \nheld in bank depositories m ay exceed federally insured limits at times.  \n \nE. Investments  \n \nInvestments are measured at fair value in the statements of financial position. Investment income or loss (including \nrealized  and unrealized  gains and losses on investments, interest/ dividends , and investment advisory fees) is included in \nnon-donor -restricted revenue and support unless the income or loss is restricted by donor or law.  \n \nF. Accounts Receivable  \n \nAccounts

'Changes in net assets with donor restrictions can result from several factors, including new contributions with donor restrictions, the satisfaction of restrictions previously imposed by donors (allowing the assets to be used), investment earnings or losses on restricted funds, and reclassifications between restricted and unrestricted net assets.  To determine the specific cause of the change between 2022 and 2023, you would need to review the statement of changes in net assets and the related notes to the financial statements, which should provide a detailed breakdown of these changes.'

## ReAct with whole PDF as Part returned

In [11]:
data_path='../data/naf23.pdf'
auditor_notes_doc=None
with open(data_path, 'rb') as fp:
    auditor_notes_doc=Part.from_data(data=fp.read(),mime_type='application/pdf')

In [12]:
# define a search function that returns the entire pdf read from Part.from_data()
def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    return auditor_notes_doc

# convert this search function to a Tool object
search_tool = convert_to_tool(search)

In [13]:
agent = ReactAgent(tools=[search_tool],)

### run1


In [14]:
user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2022 and 2023?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


"The National Ataxia Foundation's net assets with donor restrictions decreased by $638,761 between 2022 and 2023. This change is primarily attributed to a net loss from operations, partially offset by the release of restrictions from existing donor restrictions."

### run2


In [15]:
user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2022 and 2023?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 2  ----------------------------------------


"The National Ataxia Foundation's net assets with donor restrictions decreased by $361,241 from $2,041,806 in 2022 to $2,680,567 in 2023.  The Statement of Activities shows that $1,614,715 of donor restrictions were released during 2023. However, new contributions with donor restrictions were received, resulting in a net decrease.  More specifically, the decrease is primarily due to the difference between the releases from restrictions ($975,954 released in 2022 vs $1,614,715 released in 2023) and the new donor restricted contributions received during the year (which can be inferred but not explicitly stated in the provided statements).  Page 20 of the report details the specific funds with donor restrictions and their balances at the end of both 2022 and 2023."

### run3

In [16]:
user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2022 and 2023?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


"The National Ataxia Foundation's net assets with donor restrictions *decreased* by $638,761 between 2022 and 2023. This change is primarily attributed to the release of $975,954 of donor restrictions, offset by $1,614,715 in contributions with donor restrictions."

# Conclusion

* every method gets the answer wrong, it is an increase, not decrease, which i struggle to see why the model mistakes the order of 2022 and 2023, it is interpreted correctly in other notebooks
* there are variations between each run, better to run it a few times for each question

# Reload and test again

### Reload pdf

In [19]:
data_path='../data/naf23.pdf'
auditor_notes_doc=None
with open(data_path, 'rb') as fp:
    auditor_notes_doc=Part.from_data(data=fp.read(),mime_type='application/pdf')
    
    
def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    return auditor_notes_doc

# convert this search function to a Tool object
search_tool = convert_to_tool(search)

agent = ReactAgent(tools=[search_tool],)

user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


"The National Ataxia Foundation's net assets with donor restrictions decreased from $2,041,806 in 2022 to $2,680,567 in 2023.  This represents a *gain* of $638,761.  The Statement of Activities shows that this change is primarily due to $1,614,715 in contributions with donor restrictions.  This was offset by a net assets released from restrictions of $975,954.  The notes to the financial statements provide further details on the specific donor restrictions and how they were used during the year."

### use 001

In [20]:
agent = ReactAgent(tools=[search_tool],model="gemini-1.5-pro-001")

user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 1  ----------------------------------------


observation is non-printable Part object, probably the full pdf
---------------------------------------- iteration 2  ----------------------------------------


"The provided financial statements do not give specific reasons for the change in National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022. More information is needed to determine the cause."

### use RAG


In [21]:
def search(query : str):
    """
    This is a search function on a pre-exisiting financial knoweldge base, you can use this search function to search for financial information of specific companies you do not know, treat this function as a internal wiki search that you can use

    Args:
        query (str): input query            
    """
    results = rag_instance.search(query, method='ensemble')
    return json.dumps({"relevant information in the auditor notes": results})

# convert this search function to a Tool object
search_tool = convert_to_tool(search)


In [22]:
agent = ReactAgent(tools=[search_tool],model="gemini-1.5-pro-001")

user_msg="What caused the significant change in National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "Annual Financial  \nReport  \nNational Ataxia Foundation   \nSt. Louis Park, Minnesota  \n \n \nFor the years ended December 31, 2023  and 2022  \n National Ataxia Foundation  \nTable of Contents  \nDecember 31, 2023  and 2022  \nPage No.  \nIndependent Auditor's Report  3 \nFinancial Statements  \nStatement s of Financial Position  6 \nStatement s of Activities  7 \nStatements of Functional Expenses  9 \nStateme nts of Cash Flows  11 \nNotes to the Financial Statements  12 \n2 \n  \n \n \n \n \n \nINDEPENDENT AUDITOR'S REPORT  \n \n \nBoard of Directors  \nNational Ataxia Foundation  \nSt. Louis Park , Minnesota  \n \nOpinion  \n \nWe have audited the accompanying financial statements of National Ataxia Foundation  (the Foundation ), which comprise \nthe statements of financial position as of December 31, 2023  and 2022 , and the related statements of activities, \nfunctional expenses and cash flows for the years then ended, and the relat

{"relevant information in the auditor notes": "Annual Financial  \nReport  \nNational Ataxia Foundation   \nSt. Louis Park, Minnesota  \n \n \nFor the years ended December 31, 2023  and 2022  \n National Ataxia Foundation  \nTable of Contents  \nDecember 31, 2023  and 2022  \nPage No.  \nIndependent Auditor's Report  3 \nFinancial Statements  \nStatement s of Financial Position  6 \nStatement s of Activities  7 \nStatements of Functional Expenses  9 \nStateme nts of Cash Flows  11 \nNotes to the Financial Statements  12 \n2 \n  \n \n \n \n \n \nINDEPENDENT AUDITOR'S REPORT  \n \n \nBoard of Directors  \nNational Ataxia Foundation  \nSt. Louis Park , Minnesota  \n \nOpinion  \n \nWe have audited the accompanying financial statements of National Ataxia Foundation  (the Foundation ), which comprise \nthe statements of financial position as of December 31, 2023  and 2022 , and the related statements of activities, \nfunctional expenses and cash flows for the years then ended, and the relat

"I do not have access to real-time information, including specific financial statements for non-profit organizations. To get the information you are looking for, I recommend visiting the National Ataxia Foundation's website directly or exploring platforms like Guidestar that often provide access to non-profit financial reports."

### simplify the question

In [23]:
user_msg="What is the change of National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022?"
agent.run(user_msg=user_msg)

---------------------------------------- iteration 0  ----------------------------------------


{"relevant information in the auditor notes": "and stored at the NINDS SCA -BRAC biorepository. Both de-identified clinical data and biofluids are available \nfor request from non -participating investigators for approved research projects.  \n  \n13 \n National Ataxia Foundation  \nNotes to the Financial Statements  \nDecember 31, 2023  and 2022  \n \nNote 1:  Summary of Significant Accounting Policies (Continued)  \n \nB. Basis of Accounting and Presentation  \n \nThe accompanying financial statements have been prepared using  the accrual basis of accounting in accordance with \naccounting principles generally accepted in the United States of America.   \n \nRevenues are recorded when earned and expenses are recorded when a liability is incurred. Contributions received are \nrecorded as an increase in non -donor -restricted or donor -restricted support depending on the existence or nature of any \ndonor restrictions.  Accordingly, net assets of the Foundation and changes therein are 

"The change of National Ataxia Foundation's net assets with donor restrictions between 2023 and 2022 is $2,680,567 - $2,041,806 = $638,761."