In [41]:
from llama_index.core import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
import os
from dotenv import load_dotenv
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

load_dotenv()

True

### Persist on Disk


In [42]:
PERSIST_DIR = 'storage'
if not os.path.exists(PERSIST_DIR):
    os.makedirs(PERSIST_DIR)
    # Connector. This is the SimpleDirectoryReader that reads the documents from the file system
    documents = SimpleDirectoryReader("../data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Index. This is the VectorStoreIndex that indexes the documents (Nodes in LlamaIndex)
    # The input documents will be broken into nodes, and the embedding model will generate 
    # an embedding for each node.
    index = VectorStoreIndex.from_documents(documents, show_progress=True)

    # # IN CASE YOU WANT TO USE A CUSTOM TEXT SPLITTER
    # from llama_index.core.node_parser import SentenceSplitter

    # text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

    # # IN CASE YOU WANT TO USE THE SAME TEXT SPLITTER FOR ALL INDEXES
    # from llama_index.core import Settings

    # Settings.text_splitter = text_splitter

    # # IN CASE YOU WANT TO USE TEXT SPLITTER ONLY FOR THIS INDEX
    # index = VectorStoreIndex.from_documents(
    #     documents, transformations=[text_splitter]
    # )

    # Persist the index to disk (Storage)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    # Load index from storage
    index = load_index_from_storage(storage_context=storage_context)

Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 424 0 (offset 0)
Ignoring wrong pointing object 580 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 424 0 (offset 0)
Ignoring wrong pointing object 580 0 (offset 0)
Parsing nodes: 100%|██████████| 104/104 [00:00<00:00, 1533.41it/s]
Generating embeddings: 100%|██████████| 103/103 [00:02<00:00, 45.67it/s]


### Vector Stores


In [43]:
VECTOR_STORE_DIR = 'chroma_db'
if not os.path.exists(VECTOR_STORE_DIR):
    os.makedirs(VECTOR_STORE_DIR)
    # Load documents
    documents = SimpleDirectoryReader("../data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("../data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Create a new collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Create index
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Get the collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Load index from storage
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )


Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 424 0 (offset 0)
Ignoring wrong pointing object 580 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 424 0 (offset 0)
Ignoring wrong pointing object 580 0 (offset 0)


If you've already created an index, you can add new documents to your index using the insert method.

In [44]:
# index = VectorStoreIndex([])
# for doc in documents:
#     index.insert(doc)

### Query Engine


In [45]:
top_k = 5
response_mode = "tree_summarize" # Good for concise answers (summarization)

query_engine =  index.as_query_engine(
    similarity_top_k=top_k,
    response_mode=response_mode,)
response = query_engine.query("Cual es el significado de la vida?")
print(response.response)
response.source_nodes[0]

The meaning of life is to live in accordance with reason and the laws of nature, to maintain inner peace, to overcome pleasures and pains, to accept the transient nature of all things, and to face death with tranquility as a natural part of existence. Ultimately, the pursuit of philosophy is considered the key to salvation and understanding one's place in the universe.


NodeWithScore(node=TextNode(id_='d32cb4a0-108e-4591-ba5f-9b6d2b282a22', embedding=None, metadata={'page_label': '13', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 1163805, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='9415308d-cd17-464f-9369-45e95e8cb448', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '13', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/Quick-Examples/../data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 116380

### Chat Engine

**Available Chat Modes**
- `best` - Turn the query engine into a tool, for use with a ReAct data agent or an OpenAI data agent, depending on what your LLM supports. OpenAI data agents require gpt-3.5-turbo or gpt-4 as they use the - function calling API from OpenAI.

- `condense_question` - Look at the chat history and re-write the user message to be a query for the index. Return the response after reading the response from the query engine.

- `context` - Retrieve nodes from the index using every user message. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `condense_plus_context` - A combination of condense_question and context. Look at the chat history and re-write the user message to be a retrieval query for the index. The retrieved text is inserted into the system prompt, so that the chat engine can either respond naturally or use the context from the query engine.

- `simple` - A simple chat with the LLM directly, no query engine involved.

- `react` - Same as best, but forces a ReAct data agent.

- `openai` - Same as best, but forces an OpenAI data agent.

In [46]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-4o-mini")

In [47]:
# Then, at query time, the embedding model will be used again to embed the query text.
system_prompt = "Eres un maestro estoico capaz de aconsejar y hablar de esta filosofía tomando de referencia las meditaciones de Marco Aurelio"
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5, llm=llm)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

Parece que no has escrito nada en tu mensaje. Si tienes alguna pregunta o tema sobre el que te gustaría hablar, especialmente relacionado con la filosofía estoica o las "Meditaciones" de Marco Aurelio, no dudes en decírmelo. Estoy aquí para ayudarte.


In [48]:
# Predefined prompts example
from llama_index.core.prompts.system import SHAKESPEARE_WRITING_ASSISTANT
system_prompt=SHAKESPEARE_WRITING_ASSISTANT
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt=system_prompt, similarity_top_k=5)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

Hark! What dost thou seeketh, dear user? Speak thy mind, and I shall assist thee with words of Shakespearean flair.


### Structured Output

In [49]:
from pydantic import BaseModel, Field
from typing import List


class Output(BaseModel):
    """Output containing the response, page numbers, and confidence."""

    response: str = Field(..., description="The answer to the question. It has to be maximum 10 words long.")
    example: list[str] = Field(..., description="3 literal full examples from the documents that supports the answer.")
    page_numbers: List[int] = Field(
        ...,
        description="The page numbers of the sources used to answer this question. Do not include a page number if the context is irrelevant.",
    )
    confidence: float = Field(
        ...,
        description="Confidence value between 0-1 of the correctness of the result.",
    )
    confidence_explanation: str = Field(
        ..., description="Explanation for the confidence score"
    )


sllm = llm.as_structured_llm(output_cls=Output)

In [50]:
query_engine = index.as_query_engine(
    similarity_top_k=5,
    llm=sllm,
    response_mode="tree_summarize",
)

In [51]:
response = query_engine.query("Que significado tiene la vida?")
response.response.model_dump()

{'response': 'La vida es una lucha y transformación constante.',
 'example': ['la vida, una lucha, un destierro;',
  'la vida de cada uno es como una exhalación',
  'el mundo es una mutación continua: la vida, una imaginación.'],
 'page_numbers': [13, 42, 21],
 'confidence': 0.9,
 'confidence_explanation': 'The answer is supported by multiple references discussing life as a struggle and transformation.'}