# Uso de RAG para recuperar conhecimento organizacional

## Objetivo: Criar uma rag

In [83]:
%%capture --no-stderr
%pip install --quiet -U langchain langchain-community langchain-text-splitters langchain-groq chromadb pypdf langchain-mistralai ipywidgets

In [84]:
import os, getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("GROQ_API_KEY")
_set_env("MISTRAL_API_KEY")

In [None]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "/home/gui/rag-knowledge-managment/docs/pdfs/a.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()
docs[0]



{'producer': 'GPL Ghostscript 9.27',
 'creator': 'Adobe InDesign 18.1 (Macintosh)',
 'creationdate': '2023-06-29T13:55:24+02:00',
 'moddate': '2023-06-29T13:55:24+02:00',
 'source': '/home/gui/rag-knowledge-managment/docs/pdfs/a.pdf',
 'total_pages': 15,
 'page': 0,
 'page_label': '1'}


In [86]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def dividir_documentos(documentos):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return splitter.split_documents(documentos)

In [None]:
from langchain_mistralai import MistralAIEmbeddings

embeddings = MistralAIEmbeddings(model="mistral-embed")

In [None]:
from langchain_community.vectorstores import Chroma

vector_store = Chroma(
    collection_name="km",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

In [88]:
from langchain_groq import ChatGroq

def configurar_llm():
    return ChatGroq(model="llama-3.3-70b-versatile")

In [89]:
def consultar(vetor_store, llm, pergunta):
    retriever = vetor_store.as_retriever(search_kwargs={"k": 3})
    resposta = llm({"input": pergunta, "retriever": retriever})
    return resposta['output']

In [None]:
documentos = carregar_documentos("docs")
documentos_divididos = dividir_documentos(documentos)
vetor_store = criar_e_armazenar_embeddings(documentos_divididos)
llm = configurar_llm()

pergunta = "O que é product Ops?"
resposta = consultar(vetor_store, llm, pergunta)
print(resposta)

ValueError: Expected Embeddings to be non-empty list or numpy array, got [] in upsert.