## Import RAG config

In [None]:
from importlib import resources as impresources
import finsight_rag.config as config
from finsight_rag.utils import load_yaml

rag_config_path = (impresources.files(config) / "rag_config.yaml")
rag_config = load_yaml(rag_config_path)
print("Available RAG configurations:")
print(list(rag_config.keys()))

Available RAG configurations:
['dataset_path', 'vector_store_path', 'chunk_size', 'chunk_overlap', 'batch_rows', 'embedding_model']


In [11]:
dataset_rag_path = rag_config["dataset_path"]
vector_store_path = rag_config["vector_store_path"]
embedding_model = rag_config["embedding_model"]

print(f"RAG dataset path: {dataset_rag_path}")
print(f"RAG vector store path: {vector_store_path}")
print(f"RAG embedding model: {embedding_model}")

RAG dataset path: C:/Users/User/projects/FinSight-RAG/data/rag/annual-reports-2024
RAG vector store path: C:/Users/User/projects/FinSight-RAG/data/rag/annual-reports-2024-vector-store
RAG embedding model: sentence-transformers/all-MiniLM-L6-v2


## Accessing RAG vector store

In [4]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

# Initialize the embedding function (same one used when creating)
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)

# Load the persisted database
vector_store = Chroma(
    collection_name="brazilian_annual_reports",  # same collection name
    persist_directory=vector_store_path,  # path where it was saved
    embedding_function=embeddings,
)

In [5]:
dir(vector_store)

['_Chroma__ensure_collection',
 '_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_chroma_collection',
 '_client',
 '_collection',
 '_collection_configuration',
 '_collection_metadata',
 '_collection_name',
 '_cosine_relevance_score_fn',
 '_embedding_function',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aadd_texts',
 'add_documents',
 '

In [6]:
vector_store.get()

{'ids': ['81f83ec9-2bb4-41ac-8030-89dd25986a4e',
  'c11e49f0-e949-4db1-8acb-f35a0b9c47cf',
  '1db704a0-7851-465c-9434-c2c0d7ae5a11',
  '697e80f7-fbb6-41ad-8a85-903f2c9efda9',
  '100960e5-4ba2-4f7b-aabe-e8c2cea37860',
  '55b8b408-3855-4275-9a3b-25bf7ab8308b',
  'bd5366a9-751b-444c-b425-db1fc9a49041',
  '747e0eb2-35ec-485e-9f22-f0ac9358fc2c',
  '6bf9d33b-3dfe-48c1-b1e7-6640095f64ba',
  '7797d989-8d68-471d-bffd-ca072ca20f23',
  '240bf03a-fdbf-477b-b47b-63c3a484092d',
  'd458d74c-702d-4162-b0c6-4956da26f866',
  '6a2f25dc-d41f-48bd-823e-db7de9e102fb',
  'a75e56a5-8a29-42d9-9e23-17f81cd2f507',
  '0a61c5a9-16aa-498e-bc55-85d15ab87645',
  '26e7ba8d-bac3-4573-8dd0-028180394b5d',
  '96e412cf-cdcf-40c3-adb8-62870dbcea8f',
  'f181db64-b04b-4d12-a1c6-1dc124196cf5',
  '8bcde58c-7db6-420c-8aad-3ec639ff60f9',
  'd44c4f44-1dad-4951-b7b7-01a9c223c2fa',
  'aa401271-5312-408a-be66-2e35a3a7cffa',
  '9013c9fa-b4c6-47e3-bc1e-36ebb1b2eb77',
  '6dbb7e3f-7fb8-4382-8327-85865a6fb880',
  'b7b3cf16-2e6e-47b3-ab88-

In [8]:
vector_store.similarity_search("What was Vale's revenue?", k=5)

[Document(id='e2c7da74-ea27-4d0a-a33c-b9f10ce3cf3a', metadata={'page': 53, 'total_pages': 60, 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'creationdate': '2025-03-17T16:55:35+00:00', 'chunk_index': 4, 'source': 'C:/Users/User/projects/FinSight-RAG/data/rag/annual-reports-2024/Vale-Management-Report-2024.pdf', 'page_label': '54', 'moddate': '2025-03-17T16:55:52+00:00', 'producer': 'Adobe PDF Library 17.0', 'trapped': '/False'}, page_content='Payments of leasing (1,108) (1,159)\nDividends and interest on capital paid to Vale shareholders (20,662) (27,759)\nVALE\n2024 Management Report\n54'),
 Document(id='51f015c6-5fca-47ac-9f4a-ba7545c4f542', metadata={'moddate': '2025-03-17T16:55:52+00:00', 'total_pages': 60, 'page': 46, 'page_label': '47', 'trapped': '/False', 'creationdate': '2025-03-17T16:55:35+00:00', 'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.1 (Macintosh)', 'source': 'C:/Users/User/projects/FinSight-RAG/data/rag/annual-reports-2024/Vale-Management-Repor