In [9]:
import os, logging, itertools
from pathlib import Path
from dotenv import load_dotenv
from tqdm import tqdm

from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_mistralai import MistralAIEmbeddings
# Fallback: from langchain_experimental.open_clip import OpenCLIPEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [10]:
PDF_DIR           = "/home/gui/rag-knowledge-managment/docs/pdfs"        # folder with many PDFs
PERSIST_DIR       = "./chroma_db"   # where Chroma stores the collection
COLLECTION_NAME   = "my-rag"
CHUNK_SIZE        = 1000
CHUNK_OVERLAP     = 150
TOP_K             = 3              # how many chunks to retrieve
LLM_MODEL_NAME    = "llama-3.3-70b-versatile"

In [11]:
def build_vector_store():
    """Load PDFs → split → embed → store (runs once or when you add new PDFs)."""
    loader = PyPDFDirectoryLoader(PDF_DIR, recursive=True)
    docs = loader.load()
    logging.info("Loaded %d pages from %s", len(docs), PDF_DIR)

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
    )
    chunks = splitter.split_documents(docs)
    logging.info("Split into %d chunks", len(chunks))

    embeddings = MistralAIEmbeddings()  # relies on $MISTRAL_API_KEY
    # embeddings = OpenCLIPEmbeddings(model_name="ViT-H-14")  # fallback

    vectordb = Chroma.from_documents(
        chunks,
        embedding=embeddings,
        collection_name=COLLECTION_NAME,
        persist_directory=PERSIST_DIR,
    )
    vectordb.persist()
    logging.info("Vector DB persisted at %s", PERSIST_DIR)
    return vectordb

In [12]:
def load_vector_store():
    """Load an existing Chroma collection without re-embedding."""
    embeddings = MistralAIEmbeddings()
    return Chroma(
        embedding_function=embeddings,
        collection_name=COLLECTION_NAME,
        persist_directory=PERSIST_DIR,
    )

In [13]:
def build_rag_chain_with_validation(vectordb):
    retriever = vectordb.as_retriever(search_kwargs={"k": TOP_K})

    # 1) Prompt original para gerar rascunho
    draft_prompt = ChatPromptTemplate.from_template(
        """Você é um assistente útil. Use somente o contexto abaixo.
=== Contexto ===
{context}

=== Pergunta ===
{question}

=== Resposta ===
"""
    )
    llm = ChatGroq(model_name=LLM_MODEL_NAME)

    # 2) Prompt de validação
    validate_prompt = ChatPromptTemplate.from_template(
        """Eu vou te passar o contexto e uma resposta gerada.
Verifique se a resposta está:
  a) correta segundo o contexto,
  b) não adiciona informação não suportada,
  c) aborda a pergunta de forma completa.

Se estiver OK, retorne exatamente a resposta validada.
Caso contrário, retorne “Não consegui validar a resposta com o contexto.”.

=== Contexto ===
{context}

=== Resposta Gerada ===
{draft}
"""
    )
    validator = ChatGroq(model_name=LLM_MODEL_NAME)

    # 3) Pipeline LCEL
    rag_with_val = (
        {
            "question": RunnablePassthrough(),
            # gera um único string com todos os chunks
            "context": retriever | (lambda docs: "\n\n".join(d.page_content for d in docs))
        }
        # 3.1) Gera o rascunho
        | draft_prompt
        | llm

        # 3.2) Valida o rascunho
        | (  # empacota para validar
            {
                "context": RunnablePassthrough(),  # reutiliza o mesmo contexto
                "draft": RunnablePassthrough()     # pega a saída do llm
            }
            | validate_prompt
            | validator
        )
    )

    return rag_with_val

In [16]:
load_dotenv()
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
)

# build once, then comment out and just 'load_vector_store'
#vectordb = build_vector_store()  # first run
vectordb = load_vector_store()  # subsequent runs

rag_chain = build_rag_chain_with_validation(vectordb)

# Simple REPL
print("Pergunte algo (vazio para sair):")
while (q := input("> ").strip()):
    resposta_final = rag_chain.invoke(q)
    print(resposta_final)



Pergunte algo (vazio para sair):


2025-05-22 19:33:40,040 [INFO] HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
2025-05-22 19:33:42,305 [INFO] HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-22 19:33:43,128 [INFO] HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


content='Não consegui validar a resposta com o contexto.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 1119, 'total_tokens': 1130, 'completion_time': 0.04, 'prompt_time': 0.085143679, 'queue_time': 0.27589426100000003, 'total_time': 0.125143679}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_2ddfbb0da0', 'finish_reason': 'stop', 'logprobs': None} id='run--850c00b7-262e-4cbd-a565-a0be587b5ca4-0' usage_metadata={'input_tokens': 1119, 'output_tokens': 11, 'total_tokens': 1130}


2025-05-22 19:34:21,417 [INFO] HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
2025-05-22 19:34:22,739 [INFO] HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
2025-05-22 19:34:23,803 [INFO] HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


content='O objetivo geral da pesquisa de gestão do conhecimento em startups é entender como as startups podem gerenciar, descobrir, mapear, classificar, captar, distribuir, criar, multiplicar e reter conhecimento com eficiência, eficácia e efetividade, considerando os desafios únicos que elas enfrentam, como a escassez de recursos e a falta de processos estruturados, e identificar práticas eficazes de gestão do conhecimento que possam proporcionar um diferencial competitivo significativo para essas empresas.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 128, 'prompt_tokens': 747, 'total_tokens': 875, 'completion_time': 0.465454545, 'prompt_time': 0.055105503, 'queue_time': 0.278438983, 'total_time': 0.520560048}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_3f3b593e33', 'finish_reason': 'stop', 'logprobs': None} id='run--f24b4b5d-2536-4b7e-819f-19f79bbdacdc-0' usage_metadata={'input_tokens': 747, 'output_tokens': 128, 'total_tokens'