In [5]:
from llama_index.core import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
import os
from dotenv import load_dotenv
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
import json

load_dotenv()

True

### Persist on Disk


In [6]:
PERSIST_DIR = 'storage'
if not os.path.exists(PERSIST_DIR):
    os.makedirs(PERSIST_DIR)
    # Connector. This is the SimpleDirectoryReader that reads the documents from the file system
    documents = SimpleDirectoryReader("../Quick-Examples/data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../Quick-Examples/data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Index. This is the VectorStoreIndex that indexes the documents (Nodes in LlamaIndex)
    # The input documents will be broken into nodes, and the embedding model will generate 
    # an embedding for each node.
    index = VectorStoreIndex.from_documents(documents, show_progress=True)

    # # IN CASE YOU WANT TO USE A CUSTOM TEXT SPLITTER
    # from llama_index.core.node_parser import SentenceSplitter

    # text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

    # # IN CASE YOU WANT TO USE THE SAME TEXT SPLITTER FOR ALL INDEXES
    # from llama_index.core import Settings

    # Settings.text_splitter = text_splitter

    # # IN CASE YOU WANT TO USE TEXT SPLITTER ONLY FOR THIS INDEX
    # index = VectorStoreIndex.from_documents(
    #     documents, transformations=[text_splitter]
    # )

    # Persist the index to disk (Storage)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    # Load index from storage
    index = load_index_from_storage(storage_context=storage_context)

### Vector Stores


In [7]:
VECTOR_STORE_DIR = 'chroma_db'
if not os.path.exists(VECTOR_STORE_DIR):
    os.makedirs(VECTOR_STORE_DIR)
    # Load documents
    documents = SimpleDirectoryReader("../Quick-Examples/data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../Quick-Examples/data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Create a new collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Create index
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Get the collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Load index from storage
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )


If you've already created an index, you can add new documents to your index using the insert method.

In [8]:
# index = VectorStoreIndex([])
# for doc in documents:
#     index.insert(doc)

### Query Engine


In [9]:
top_k = 5
response_mode = "tree_summarize" # Good for concise answers (summarization)

query_engine =  index.as_query_engine(
    similarity_top_k=top_k,
    response_mode=response_mode,)
response = query_engine.query("Cual es el significado de la vida?")
print(response.response)
response.source_nodes[0]

El significado de la vida se relaciona con la necesidad de obedecer a la razón y a la ley de la naturaleza, mantener la filosofía como guía para enfrentar placeres y dolores, aceptar la transformación constante de todas las cosas y aguardar la muerte con serenidad al comprender que es parte de la disolución natural de los elementos que componen todo ser viviente.


NodeWithScore(node=TextNode(id_='3d4d5a74-c933-4c7b-8991-450264d52cf2', embedding=None, metadata={'page_label': '13', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../Quick-Examples/data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 1163805, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e6bd5f18-0e02-4ae4-913f-a091ecd6f997', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '13', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../Quick-Examples/data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'applica

### Chat Engine

**Available Chat Modes**
- `best` - Turn the query engine into a tool, for use with a ReAct data agent or an OpenAI data agent, depending on what your LLM supports. OpenAI data agents require gpt-3.5-turbo or gpt-4 as they use the - function calling API from OpenAI.

- `condense_question` - Look at the chat history and re-write the user message to be a query for the index. Return the response after reading the response from the query engine.

- `context` - Retrieve nodes from the index using every user message. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `condense_plus_context` - A combination of condense_question and context. Look at the chat history and re-write the user message to be a retrieval query for the index. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `simple` - A simple chat with the LLM directly, no query engine involved.

- `react` - Same as best, but forces a ReAct data agent.

- `openai` - Same as best, but forces an OpenAI data agent.

In [10]:
from llama_index.llms.openai import OpenAI
from llama_index.llms.gemini import Gemini

llm_openai = OpenAI(model="gpt-4o-mini", api_key=os.environ["OPENAI_API_KEY"])
llm_gemini = Gemini(model="models/gemini-2.0-flash", api_key=os.environ["GEMINI_API_KEY"])

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# Then, at query time, the embedding model will be used again to embed the query text.
system_prompt = "Eres un maestro estoico capaz de aconsejar y hablar de esta filosofía tomando de referencia las meditaciones de Marco Aurelio"
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5, llm=llm_openai)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

Virtud.


In [12]:
# Predefined prompts example
from llama_index.core.prompts.system import SHAKESPEARE_WRITING_ASSISTANT
system_prompt=SHAKESPEARE_WRITING_ASSISTANT
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5, llm=llm_openai)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

Amor.


### Structured Output

In [13]:
from pydantic import BaseModel, Field, field_validator
from typing import List


class Output(BaseModel):
    """Output containing the response, page numbers, and confidence."""

    response: str = Field(..., description="The answer to the question. It has to be maximum 10 words long.")
    example: list[str] = Field(..., description="3 literal full examples from the documents that supports the answer.")
    page_numbers: List[int] = Field(
        ...,
        description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.",
    )
    confidence: float = Field(
        ...,
        description="Confidence value between 0-1 of the correctness of the result.",
    )
    confidence_explanation: str = Field(
        ..., description="Explanation for the confidence score"
    )

sllm_openai = llm_openai.as_structured_llm(output_cls=Output)


In [14]:
query_engine = index.as_query_engine(
    similarity_top_k=50,
    llm=sllm_openai,
    response_mode="tree_summarize",
)

In [15]:
response = query_engine.query("Cuál es el sentido de la vida?")
response.response.model_dump()

{'response': 'Obrar conforme a la naturaleza y la justicia.',
 'example': ['"El fin de los seres racionales es obedecer a la razón y a la ley de la naturaleza."',
  '"Conviene aprovechar el presente, usándolo con reflexión y justicia."',
  '"La misión de la naturaleza universal se reduce a trasladar allí lo que estaba ahí, a transformarlo."'],
 'page_numbers': [13, 19, 51],
 'confidence': 0.9,
 'confidence_explanation': 'The answer is supported by multiple references discussing the purpose of life in terms of nature and justice.'}