In [None]:
import os
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.vectorstores import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough
from IPython.display import display, Markdown

In [None]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
persist_directory = "./chroma_db"

# --- Seção de Ingestão de Dados ---
# 1.1 Carregar o documento PDF e dividir em pedaços
pdf_path = r"sample_apostila_direito_adm.pdf"  # <<-- Altere para o caminho do seu arquivo PDF
loader = PyPDFLoader(pdf_path)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = loader.load_and_split(text_splitter)

In [None]:
# 2. Inicializar o modelo de embeddings e o banco de dados
embeddings_model = GoogleGenerativeAIEmbeddings(google_api_key=GEMINI_API_KEY, model="models/embedding-001")
vector_store = Chroma.from_documents(
    documents=documents,
    embedding=embeddings_model,
    persist_directory=persist_directory
)

In [None]:
retriever = vector_store.as_retriever()

# 4. Definir o template do prompt
template = """
Use os seguintes pedaços de contexto para responder à pergunta no final.
Se você não souber a resposta, diga que não sabe.

Contexto: {context}

Pergunta: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [None]:
# 5. Inicializar o modelo de linguagem Gemini
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7, google_api_key=GEMINI_API_KEY)

# 6. Construir a cadeia RAG com LCEL
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
print("Pronto para conversar com seu PDF. Digite 'sair' para encerrar.")
while True:
    question = input("Sua pergunta: ")
    if question.lower() == 'sair':
        break
    
    response = rag_chain.invoke(question)  # já é string
    display(Markdown(response))