In [61]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain.retrievers import ParentDocumentRetriever
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain.storage import InMemoryStore
from dotenv import load_dotenv
import os

In [47]:
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [48]:
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

llm = ChatOpenAI(model_name='gpt-3.5-turbo', max_tokens=500)

In [49]:
# Carregar o PDF

pdf_link = 'pdf-investimentos-RAG.pdf'
loader = PyPDFLoader(pdf_link, extract_images=False)
pages = loader.load_and_split()

In [50]:
len(pages)

25

In [51]:
# Splitter 
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)

parent_splitter = RecursiveCharacterTextSplitter(
    chunk_size=4000,
    chunk_overlap=200,
    length_function=len,
    add_start_index=True
)

In [52]:
# Storages
store = InMemoryStore()
vectorstore = Chroma(embedding_function=embeddings, persist_directory='childVectorDB')

In [53]:
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter
)

parent_document_retriever.add_documents(pages, ids=None)


In [57]:
parent_document_retriever.vectorstore.get()

{'ids': ['8f5d1c04-d616-4f64-a1e4-1816e9ed9d36',
  '6feb2777-d1d7-4a13-9bfd-04b95368e116',
  '9956f36b-11ec-4e98-967e-441a83a2bb43',
  '741a42e3-3329-4863-9160-8a69558165f4',
  '0656660b-44cb-43a1-aed7-67edda1d7b5d',
  '498d2615-2fdb-48c5-8f8e-05a03280b1e3',
  '76b89481-f6c3-4839-90cb-4ed1e4b8e820',
  '60e8917e-8de5-4336-b614-0199d502b7d6',
  '61a1d1b4-2016-4a52-bf13-18582db7e252',
  'a2e50f8f-da47-42f3-986e-75871cfcc86a',
  'a78f5592-6e0f-429f-b529-4972b76e32f9',
  '1f530b66-cf82-4b73-9ed4-c922ddf9d539',
  '583ca938-9b57-428a-b878-1aff9b5db20c',
  '13b1c0ac-d785-4de1-9ef0-bef28308bbd0',
  '3c67dae7-cc3f-4b3d-8889-6fad97e89504',
  '705506e8-6375-4dc3-8db9-bccfdc83ff68',
  '5fe5593c-0c1c-4d80-bb71-4696ba5a09b1',
  'ff6ab0e7-7a33-45aa-be16-a65644f1c7d0',
  'a11083b2-42ad-47f7-a277-81f9750ebb7e',
  '4076ea89-554d-49ab-9c75-9ee279c6c97b',
  '8ef60c3d-6d43-4eb2-970d-1a95c157092d',
  '4dbce00e-07f1-45db-af6d-254fcf2d42a7',
  '50b7a615-957f-4cf5-b25d-624f7a8be799',
  'aaa2b69b-7218-4a0a-bf80-

In [62]:
TEMPLATE =  """"
    Você é um especialista em investimentos e temas do tal. Responda a perguntas utilizando o contexto informado.
    Query: 
    {question}
    
    Context:
    {context}
"""

rag_prompt = ChatPromptTemplate.from_template(TEMPLATE)

In [63]:
setup_retrival = RunnableParallel({
    "question": RunnablePassthrough(),
    "context": parent_document_retriever
})

output_parser = StrOutputParser()

In [64]:
parent_chain_retrival = setup_retrival | rag_prompt | llm | output_parser

In [68]:
parent_chain_retrival.invoke("Qual a diferença entre renda fixa e renda variável?")

'Na renda fixa, a remuneração ou sua forma de cálculo é previamente definida no momento da aplicação. O investidor sabe exatamente o valor que receberá no vencimento, sendo chamado de renda fixa pré-fixado. Já na renda variável, o investidor não sabe antecipadamente qual será a rentabilidade da aplicação, sendo variável e dependente de inúmeros fatores. Em resumo, na renda fixa o retorno é previsível, enquanto na renda variável o retorno é incerto e dependente do desempenho futuro da empresa.'