In [1]:
import os

import dotenv
%load_ext dotenv
%dotenv

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
def add(x: int, y: int) -> int:
    """Add two numbers together""" 
    return x + y

def subtract(x: int, y: int) -> int:
    """Subtract two numbers together"""
    return x - y

def multiply(x: int, y: int) -> int:
    """Multiply two numbers together"""
    return x * y

def divide(x: int, y: int) -> float:
    """Divide two numbers, x by y"""
    return x / y

def get_user_info(name: str) -> str:
    """Get user informatio"""
    data = {
        "John": {
            "age": 25,
            "location": "New York"
        },
        "Jane": {
            "age": 22,
            "location": "San Francisco"
        }
    }
    return f"{name} is {data[name]['age']} years old and lives in {data[name]['location']}"


In [4]:
from llama_index.core.tools import FunctionTool

addition_tool = FunctionTool.from_defaults(fn=add)
subtraction_tool = FunctionTool.from_defaults(fn=subtract)
multiplication_tool = FunctionTool.from_defaults(fn=multiply)
division_tool = FunctionTool.from_defaults(fn=divide)
get_user_info_tool = FunctionTool.from_defaults(fn=get_user_info)

tools = [addition_tool, subtraction_tool, multiplication_tool, division_tool, get_user_info_tool]

In [5]:
from llama_index.core import set_global_service_context
from my_lib.openai_config import service_context_openai

set_global_service_context(service_context_openai)


In [6]:
from llama_index.core import Settings

llm = Settings.llm

response = llm.predict_and_call(
    tools,
    "Add 5 and 5",
    verbose=True
)

print(str(response))

=== Calling Function ===
Calling function: add with args: {"x": 5, "y": 5}
=== Function Output ===
10
10


In [7]:
response = llm.predict_and_call(
    tools,
    "Tell me about John",
    verbose=True
)

=== Calling Function ===
Calling function: get_user_info with args: {"name": "John"}
=== Function Output ===
John is 25 years old and lives in New York


In [8]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=["./documents/lora_paper.pdf"]).load_data()

In [9]:
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
node = splitter.get_nodes_from_documents(documents)

In [10]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(node)

In [11]:
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {"key": "page_label", "value": "2"}
        ]
    )
)

response = query_engine.query("Cuentame sobre el problema planteado y como se explica")
print(str(response))

El problema planteado se centra en la adaptación de un modelo pre-entrenado a tareas específicas de generación de texto condicional, como resumen, comprensión de lectura automática y lenguaje natural a SQL. Se parte de un modelo autoregresivo de lenguaje pre-entrenado PΦ(y|x) parametrizado por Φ, como un aprendiz multi-tarea genérico basado en la arquitectura Transformer. La adaptación implica ajustar este modelo pre-entrenado a nuevas tareas mediante un conjunto de datos de entrenamiento que consiste en pares de contexto-objetivo. Cada tarea de generación de texto condicional tiene sus propios pares de contexto-objetivo, como consultas de lenguaje natural a comandos SQL o contenido de un artículo a su resumen.


In [12]:
for n in response.source_nodes:
    print(n.metadata)
    print("=============Text=============")
    print(n.get_text())
    print("=============Text=============")
    

{'page_label': '2', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
often introduce inference latency (Houlsby et al., 2019; Rebufﬁ et al., 2017) by extending model
depth or reduce the model’s usable sequence length (Li & Liang, 2021; Lester et al., 2021; Ham-
bardzumyan et al., 2020; Liu et al., 2021) (Section 3). More importantly, these method often fail to
match the ﬁne-tuning baselines, posing a trade-off between efﬁciency and model quality.
We take inspiration from Li et al. (2018a); Aghajanyan et al. (2020) which show that the learned
over-parametrized models in fact reside on a low intrinsic dimension. We hypothesize that the
change in weights during model adaptation also has a low “intrinsic rank”, leading to our proposed
Low-RankAdaptation (LoRA) approach. LoRA allows us to train some dense layers in a neural
network indirectly by op

In [13]:
from typing import List
from llama_index.core.vector_stores import FilterCondition

def vector_search_query(
        query: str,
        page_numbers: List[str]
) -> str:
    """Conduct a vector search across an index using the following parameters:

    query (str): This is the text string you want to embed and search for within the index.
    page_numbers (List[str]): This parameter allows you to limit the search to 
    specific pages. If left empty, the search will encompass all pages in the index. 
    If page numbers are specified, the search will be filtered to only include those pages.
    
    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]
    
    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response

In [14]:
vector_query_tool = FunctionTool.from_defaults(
    name="vector_search_tool",
    fn=vector_search_query
)

In [15]:
response = llm.predict_and_call(
    [vector_query_tool], 
    "What was mentioned about the problem statement in page 2?", 
    verbose=True
)

=== Calling Function ===
Calling function: vector_search_tool with args: {"query": "problem statement", "page_numbers": ["2"]}
=== Function Output ===
While the proposal is independent of the training objective, the focus is on language modeling, particularly maximizing conditional probabilities based on a task-specific prompt. The scenario involves adapting a pre-trained autoregressive language model to various downstream conditional text generation tasks like summarization, machine reading comprehension (MRC), and natural language to SQL (NL2SQL). Each task is defined by a dataset of context-target pairs, where both the context (xi) and target (yi) are sequences of tokens. For instance, in NL2SQL, xi represents a natural language query and yi its corresponding SQL command, while in summarization, xi is the article content and yi is its summary.


In [18]:
for n in response.source_nodes:
    print(n.metadata)
    print("=============Text=============")
    print(n.get_text())
    print("=============Text=============")

{'page_label': '2', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
often introduce inference latency (Houlsby et al., 2019; Rebufﬁ et al., 2017) by extending model
depth or reduce the model’s usable sequence length (Li & Liang, 2021; Lester et al., 2021; Ham-
bardzumyan et al., 2020; Liu et al., 2021) (Section 3). More importantly, these method often fail to
match the ﬁne-tuning baselines, posing a trade-off between efﬁciency and model quality.
We take inspiration from Li et al. (2018a); Aghajanyan et al. (2020) which show that the learned
over-parametrized models in fact reside on a low intrinsic dimension. We hypothesize that the
change in weights during model adaptation also has a low “intrinsic rank”, leading to our proposed
Low-RankAdaptation (LoRA) approach. LoRA allows us to train some dense layers in a neural
network indirectly by op

In [19]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(node)

summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)

summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to the Lora paper."
    ),
)

In [21]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "What was mentioned about the problem statement in page 2?", 
    verbose=True
)

=== Calling Function ===
Calling function: vector_search_tool with args: {"query": "problem statement", "page_numbers": ["2"]}
=== Function Output ===
The problem statement focuses on language modeling, particularly on maximizing conditional probabilities given a task-specific prompt. It involves adapting a pre-trained autoregressive language model to downstream conditional text generation tasks like summarization, machine reading comprehension, and natural language to SQL. Each task is defined by a dataset of context-target pairs, where the goal is to generate the target text based on the provided context.


In [22]:
for n in response.source_nodes:
    print(n.metadata)
    print("=============Text=============")
    print(n.get_text()[:10])
    print("=============Text=============")

{'page_label': '2', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
often intr


In [23]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "Dame un resumen del articulo.", 
    verbose=True
)

=== Calling Function ===
Calling function: summary_tool with args: {"input": "Dame un resumen del articulo."}
=== Function Output ===
El artículo presenta un enfoque llamado LoRA para la adaptación eficiente de modelos de lenguaje pre-entrenados a tareas específicas. LoRA implica congelar los pesos del modelo pre-entrenado y agregar matrices de descomposición de rango entrenables en cada capa, lo que reduce la cantidad de parámetros entrenables. Se demuestra que LoRA supera o iguala al ajuste fino tradicional en la calidad del modelo en diversas tareas, como RoBERTa, DeBERTa, GPT-2 y GPT-3, a pesar de tener menos parámetros entrenables y una mayor eficiencia de entrenamiento. Además, se discuten investigaciones empíricas sobre la deficiencia de rango en la adaptación de modelos de lenguaje.


In [24]:
for n in response.source_nodes:
    print(n.metadata)
    print("=============Text=============")
    print(n.get_text()[:10])
    print("=============Text=============")

{'page_label': '1', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
LORA: L OW
{'page_label': '2', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
often intr
{'page_label': '3', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
During ful
{'page_label': '3', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_type': 'application/pdf', 'file_size': 1609513, 'creation_date': '2024-08-13', 'last_modified_date': '2024-08-13'}
This makes
{'page_label': '4', 'file_name': 'lora_paper.pdf', 'file_path': 'documents\\lora_paper.pdf', 'file_t