In [7]:
from llama_index.core import (
    VectorStoreIndex, 
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage
)
import os.path
from dotenv import load_dotenv
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

load_dotenv()

True

### Persist on Disk


In [8]:
PERSIST_DIR = './storage'
if not os.path.exists(PERSIST_DIR):
    os.makedirs(PERSIST_DIR)
    # Connector. This is the SimpleDirectoryReader that reads the documents from the file system
    documents = SimpleDirectoryReader("data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("./data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Index. This is the VectorStoreIndex that indexes the documents (Nodes in LlamaIndex)
    # The input documents will be broken into nodes, and the embedding model will generate 
    # an embedding for each node.
    index = VectorStoreIndex.from_documents(documents, show_progress=True)

    # # IN CASE YOU WANT TO USE A CUSTOM TEXT SPLITTER
    # from llama_index.core.node_parser import SentenceSplitter

    # text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)

    # # IN CASE YOU WANT TO USE THE SAME TEXT SPLITTER FOR ALL INDEXES
    # from llama_index.core import Settings

    # Settings.text_splitter = text_splitter

    # # IN CASE YOU WANT TO USE TEXT SPLITTER ONLY FOR THIS INDEX
    # index = VectorStoreIndex.from_documents(
    #     documents, transformations=[text_splitter]
    # )

    # Persist the index to disk (Storage)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # Rebuild storage context
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    # Load index from storage
    index = load_index_from_storage(storage_context=storage_context)

### Vectore Stores


In [9]:
VECTOR_STORE_DIR = './chroma_db'
if not os.path.exists(VECTOR_STORE_DIR):
    os.makedirs(VECTOR_STORE_DIR)
    # Load documents
    documents = SimpleDirectoryReader("./data").load_data()
    # Verify that there are no empty documents
    documents = SimpleDirectoryReader("./data").load_data()
    for doc in documents:
        if not doc:
            print("Documento vacío encontrado")

    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path=VECTOR_STORE_DIR)

    # Create a new collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Create index
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
else:
    # Initialize the ChromaDB client
    db = chromadb.PersistentClient(path='./chroma_db')

    # Get the collection
    chroma_collection = db.get_or_create_collection('chroma_collection')

    # Assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Load index from storage
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )


If you've already created an index, you can add new documents to your index using the insert method.

In [10]:
# index = VectorStoreIndex([])
# for doc in documents:
#     index.insert(doc)

### Querying


In [11]:
query_engine =  index.as_query_engine()
response = query_engine.query("Cual es el significado de la vida?")
response

Response(response='El significado de la vida radica en vivir de acuerdo con la razón y la ley de la naturaleza, manteniendo la filosofía como guía para preservar el equilibrio interno, enfrentar los placeres y dolores, aceptar los eventos de la vida con serenidad y ver la muerte como parte natural de la disolución de los elementos que componen todo ser viviente.', source_nodes=[NodeWithScore(node=TextNode(id_='2eb47283-0f15-419a-be3b-7ffc4d834d55', embedding=None, metadata={'page_label': '13', 'file_name': 'Meditaciones-Marco-Aurelio.pdf', 'file_path': '/home/rprieto/RAG/data/Meditaciones-Marco-Aurelio.pdf', 'file_type': 'application/pdf', 'file_size': 1163805, 'creation_date': '2025-03-20', 'last_modified_date': '2025-03-20'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], re

### Chat Engine

In [12]:
# Then, at query time, the embedding model will be used again to embed the query text.
chat_engine = index.as_chat_engine(chat_mode='context', verbose=False, system_prompt="Eres un maestro estoico capaz de aconsejar y hablar de esta filosofía tomando de referencia las meditaciones de Marco Aurelio", similarity_top_k=5)
for i in range(1):
    response = chat_engine.chat(input())
    print(response)

¡Hola! Veo que estás interesado en la filosofía estoica y las meditaciones de Marco Aurelio. ¿En qué puedo ayudarte hoy?
