#### TOOL CALLING WITH AGENTS

In [1]:
import dotenv
%load_ext dotenv
%dotenv
import nest_asyncio
nest_asyncio.apply()

In [4]:
#DEFINE TOOLS
def add(x: int, y: int) -> int:
    """Add two numbers together"""
    return x + y

def substract(x: int, y: int) -> int:
    """Substract two numbers"""
    return x - y

def multiply(x: int, y: int) -> int:
    """Multiply two numbers"""
    return x * y

def divide(x: int, y: int) -> int:
    """Divide two numbers"""
    return x / y

def get_user_info(username: str) -> str:
    """Get user information"""

    database = {
        "Antonio": {
            "name": "Antonio Lopez",
            "age": 30,
            "email": "antonio@example.com"
        }, 
        "Nelson": {
            "name": "Nelson Rodriguez",
            "age": 25,
            "email": "nelson@example.com"
        }
    }
    
    return f"Username: {username}, Info: {database.get(username.lower(), 'User not found')}"


#CREATE TOOLS FROM PYTHON FUNCTIONS 
from llama_index.core.tools import FunctionTool

addition_tools = FunctionTool.from_defaults(fn=add)
substraction_tools = FunctionTool.from_defaults(fn=substract)
multiplication_tools = FunctionTool.from_defaults(fn=multiply)
divide_tools = FunctionTool.from_defaults(fn=divide)
get_user_info_tools = FunctionTool.from_defaults(fn=get_user_info)

tools = [addition_tools, substraction_tools, multiplication_tools, divide_tools, get_user_info_tools]

#TEST TOOLS
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")


In [3]:
response = llm.predict_and_call(
    tools,
    "Add 2 and 3", 
    verbose=True
)

print(str(response))

=== Calling Function ===
Calling function: add with args: {"x": 2, "y": 3}
=== Function Output ===
5
5


#### VECTOR SEARCH WITH METADATA

In [5]:
from llama_index.core import SimpleDirectoryReader

#read paper
documents = SimpleDirectoryReader(input_files=["./datasets/lora_paper.pdf"]).load_data()

In [6]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents=documents)

In [7]:
len(nodes)

38

In [8]:
###LLM AND EMBEDDING MODEL
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embedding = OpenAIEmbedding(model="text-embedding-ada-002")



In [9]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes=nodes)

In [None]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=3,
    filters=MetadataFilters.from_dicts(
        [
            {"key": "page_label", "value": "2"}
        ]
    )
)

response = query_engine.query("Tell me about the problem statement as explained in page 2")
print(str(response))

In [None]:
for n in response.source_nodes:
    print(n.metadata)
    print("==================")
    print(n.get_text())
    print("==================")

In [14]:
from typing import List
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.vector_stores import MetadataFilters

def vector_search_query(
        query: str, 
        page_numbers: List[str]
) -> str:
    """
    Conduct a vector search across an index using the following parameters:

    query (str): This is the text string you want to embed and search for within the index.
    page_numbers (List[str]): This parameter allows you to limit the search to 
    specific pages. If left empty, the search will encompass all pages in the index. 
    If page numbers are specified, the search will be filtered to only include those pages.
    
    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]

    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )

    response = query_engine.query(query)
    return response

In [11]:
vector_query_tool = FunctionTool.from_defaults(
    fn=vector_search_query,
    name="vector_search_query_tool"
)

In [15]:
response=llm.predict_and_call(
    [vector_query_tool],
    "Explain the problem statement in page 2",
    verbose=True

)

=== Calling Function ===
Calling function: vector_search_query_tool with args: {"query": "problem statement", "page_numbers": ["2"]}
=== Function Output ===
We focus on language modeling as our motivating use case, particularly on maximizing conditional probabilities given a task-specific prompt. The scenario involves adapting a pre-trained autoregressive language model to downstream conditional text generation tasks like summarization, machine reading comprehension (MRC), and natural language to SQL (NL2SQL). Each task is defined by a dataset of context-target pairs, where both the context and target are sequences of tokens. For instance, in NL2SQL, the context is a natural language query and the target is the corresponding SQL command; in summarization, the context is an article's content and the target is its summary.


In [16]:
for n in response.source_nodes:
    print(n.metadata)
    print("==================")
    print(n.get_text())
    print("==================")

{'page_label': '2', 'file_name': 'lora_paper.pdf', 'file_path': 'datasets/lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-06-08', 'last_modified_date': '2024-06-08'}
often introduce inference latency (Houlsby et al., 2019; Rebufﬁ et al., 2017) by extending model
depth or reduce the model’s usable sequence length (Li & Liang, 2021; Lester et al., 2021; Ham-
bardzumyan et al., 2020; Liu et al., 2021) (Section 3). More importantly, these method often fail to
match the ﬁne-tuning baselines, posing a trade-off between efﬁciency and model quality.
We take inspiration from Li et al. (2018a); Aghajanyan et al. (2020) which show that the learned
over-parametrized models in fact reside on a low intrinsic dimension. We hypothesize that the
change in weights during model adaptation also has a low “intrinsic rank”, leading to our proposed
Low-RankAdaptation (LoRA) approach. LoRA allows us to train some dense layers in a neural
network indirectly by opti

In [17]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes=nodes)

summary_query_engine_tool = summary_index.as_query_engine(
    use_async=True,
    response_mode="tree_summarize"
)

summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine_tool,
    name="summary_tool",
    description="Useful for summarization questions related to the Lora paper."
)

In [19]:
response = llm.predict_and_call(
    [summary_tool, vector_query_tool],
    "Summarize how to apply Lora to Transfomer in page 5 in 2 sentences",
    verbose=True
)

=== Calling Function ===
Calling function: summary_tool with args: {"input": "Apply Lora to Transformer by incorporating the Lora module into the Transformer architecture, enabling the model to capture long-range dependencies efficiently. This integration involves modifying the self-attention mechanism in the Transformer to include Lora's attention mechanism for improved performance."}
=== Function Output ===
Incorporating LoRA into the Transformer architecture involves modifying the self-attention mechanism to include LoRA's attention mechanism. This adaptation allows the model to efficiently capture long-range dependencies, leading to improved performance in handling downstream tasks.
