In [2]:
from llama_index.core import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
import os
from dotenv import load_dotenv
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

load_dotenv()

True

### Persist on Disk


In [3]:
PERSIST_DIR = 'storage'
if not os.path.exists(PERSIST_DIR):
    os.makedirs(PERSIST_DIR)
    # Connector. This is the SimpleDirectoryReader that reads the documents from the file system
    documents = SimpleDirectoryReader("../data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Index. This is the VectorStoreIndex that indexes the documents (Nodes in LlamaIndex)
    # The input documents will be broken into nodes, and the embedding model will generate 
    # an embedding for each node.
    index = VectorStoreIndex.from_documents(documents, show_progress=True)

    # # IN CASE YOU WANT TO USE A CUSTOM TEXT SPLITTER
    # from llama_index.core.node_parser import SentenceSplitter

    # text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

    # # IN CASE YOU WANT TO USE THE SAME TEXT SPLITTER FOR ALL INDEXES
    # from llama_index.core import Settings

    # Settings.text_splitter = text_splitter

    # # IN CASE YOU WANT TO USE TEXT SPLITTER ONLY FOR THIS INDEX
    # index = VectorStoreIndex.from_documents(
    #     documents, transformations=[text_splitter]
    # )

    # Persist the index to disk (Storage)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    # Load index from storage
    index = load_index_from_storage(storage_context=storage_context)

### Vector Stores


In [4]:
VECTOR_STORE_DIR = 'chroma_db'
if not os.path.exists(VECTOR_STORE_DIR):
    os.makedirs(VECTOR_STORE_DIR)
    # Load documents
    documents = SimpleDirectoryReader("../data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Create a new collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Create index
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Get the collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Load index from storage
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )


If you've already created an index, you can add new documents to your index using the insert method.

In [5]:
# index = VectorStoreIndex([])
# for doc in documents:
#     index.insert(doc)

### Query Engine


In [6]:
top_k = 5
response_mode = "tree_summarize" # Good for concise answers (summarization)

query_engine =  index.as_query_engine(
    similarity_top_k=top_k,
    response_mode=response_mode,)
response = query_engine.query("Cual es el significado de la vida?")
print(response.response)
response.source_nodes # TODO: What is this and what is used for?

El significado de la vida se relaciona con discernir claramente cada aspecto de la existencia, practicar la justicia, decir la verdad y aprovecharla encadenando una obra buena con otra buena, sin dejar intervalos entre ellas. Además, implica reflexionar sobre la brevedad de la vida, la infinidad del tiempo pasado y venidero, así como la poca consistencia de todo lo material. También se destaca la importancia de seguir la razón y a Dios, y de actuar conforme a las normas de la naturaleza y sentir según las exigencias de la naturaleza común.


[NodeWithScore(node=TextNode(id_='d69ca734-9e49-48ae-a0e3-183e43070021', embedding=None, metadata={'page_label': '24', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 1163805, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='8030a86e-8bae-4688-a176-892d6c081b26', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '24', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 11638

### Chat Engine

**Available Chat Modes**
- `best` - Turn the query engine into a tool, for use with a ReAct data agent or an OpenAI data agent, depending on what your LLM supports. OpenAI data agents require gpt-3.5-turbo or gpt-4 as they use the - function calling API from OpenAI.

- `condense_question` - Look at the chat history and re-write the user message to be a query for the index. Return the response after reading the response from the query engine.

- `context` - Retrieve nodes from the index using every user message. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `condense_plus_context` - A combination of condense_question and context. Look at the chat history and re-write the user message to be a retrieval query for the index. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `simple` - A simple chat with the LLM directly, no query engine involved.

- `react` - Same as best, but forces a ReAct data agent.

- `openai` - Same as best, but forces an OpenAI data agent.

In [None]:
# Then, at query time, the embedding model will be used again to embed the query text.
system_prompt = "Eres un maestro estoico capaz de aconsejar y hablar de esta filosofía tomando de referencia las meditaciones de Marco Aurelio"
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

In [None]:
# Predefined prompts example
from llama_index.core.prompts.system import SHAKESPEARE_WRITING_ASSISTANT
system_prompt=SHAKESPEARE_WRITING_ASSISTANT
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

Ah, dear mortal, death is but a passage, a transition from this earthly realm to the unknown. It is a journey all must take, a fate that awaits every living soul. Yet, fear not this inevitable end, for in death lies the ultimate truth of our existence. It is a mystery, a part of the grand design of the universe, where the soul may find peace and rest. Embrace death as a natural part of life's cycle, for in its embrace, we may find solace and understanding.


### Structured Output

In [8]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-4o-mini")

In [29]:
from pydantic import BaseModel, Field
from typing import List


class Output(BaseModel):
    """Output containing the response, page numbers, and confidence."""

    response: str = Field(..., description="The answer to the question. It has to be maximum 10 words long.")
    example: list[str] = Field(..., description="3 literal full examples from the documents that supports the answer.")
    page_numbers: List[int] = Field(
        ...,
        description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.",
    )
    confidence: float = Field(
        ...,
        description="Confidence value between 0-1 of the correctness of the result.",
    )
    confidence_explanation: str = Field(
        ..., description="Explanation for the confidence score"
    )


sllm = llm.as_structured_llm(output_cls=Output)

In [30]:
query_engine = index.as_query_engine(
    similarity_top_k=5,
    llm=sllm,
    response_mode="tree_summarize",  # you can also select other modes like `compact`, `refine`
)

In [31]:
response = query_engine.query("Cual es el significado de la muerte?")
response.response.dict()

/tmp/ipykernel_2477461/3359182344.py:2: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response.response.dict()


{'response': 'La muerte es una transformación natural.',
 'example': ['La cesación de una actividad no es mal alguno.',
  'Toda mutación es aquí una muerte.',
  'Considera siempre las cosas humanas como efímeras.'],
 'page_numbers': [76, 28, 29],
 'confidence': 0.9,
 'confidence_explanation': 'The examples provided directly relate to the concept of death as a natural transformation and its ephemeral nature.'}