In [13]:
import os
from dotenv import load_dotenv
import time
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [14]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

In [15]:
# llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro", temperature=0.7, google_api_key=api_key)
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7, google_api_key=api_key)

In [16]:
pdf_loader = PyPDFLoader('Sistemas Operacionais_ Conceitos e Mecanismos - socm-livro.pdf')
documents = pdf_loader.load()
first_content = documents[0]
first_content.page_content

''

In [17]:
text_splitter = RecursiveCharacterTextSplitter(
    separators=['\n\n', '\n', '.', ' ', ''],
    chunk_size=1200,
    chunk_overlap=200
)

In [18]:
chunks = text_splitter.split_documents(documents)
print([len(chunks) for chunk in chunks])

[1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 1145, 114

In [19]:
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001"
)

In [21]:
batch_size = 100
vector_store = Chroma.from_documents(
    documents=chunks[:batch_size],
    embedding=embedding_model,
)
for i in range(batch_size, len(chunks), batch_size):
    # Pega o próximo lote
    batch_end = min(i + batch_size, len(chunks))
    batch_chunks = chunks[i:batch_end]
    
    print(f"Processando lote de documentos de {i} a {batch_end-1}...")
    
    # Adiciona o lote ao vector store já existente
    vector_store.add_documents(batch_chunks)
    
    # Pausa de 1 segundo para respeitar o limite de requisições por minuto (RPM) da API
    time.sleep(10)

print("\nTodos os lotes foram processados e o Vector Store está completo.")

GoogleGenerativeAIError: Error embedding content: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
]

In [None]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

In [None]:
# vector_store = Chroma.from_documents(
#     documents=chunks,
#     embedding=embedding_model,
#     persist_directory="./chroma_db"
# )
# retriever = vector_store.as_retriever(
#     search_type="similarity",
#     search_kwargs={"k":1}
# )

In [None]:
prompt = ChatPromptTemplate.from_template("""
Use o sseguintes contextos paar responder a pergunta final. 
Se não souber a resposta, diga não.
Contexto:{context}
Questão:{question}
""")

In [None]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
response = chain.invoke({"question": "Como funciona o uso de um fork() nos terminais pais e filho de acordo com o documento?"})
response