# Congresso RAG

## Installing packages

In [None]:
%pip install -qU langchain-community langchain-openai langchain-core pypdf openai

Note: you may need to restart the kernel to use updated packages.


## Importing packages

In [2]:
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

## Loading and Splitting Documents

In [3]:
file_path = "./diario-da-camara-dos-deputados-01-05-2025.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

113


358

## Embeddings

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

vectors = [
    embeddings.embed_query(split.page_content) 
    for split in all_splits
]

vectors = []
for index, split in enumerate(all_splits):
    print(f"embedding split {index}/{len(all_splits)}...")
    vector = embeddings.embed_query(split.page_content) 
    vectors.append(vector)

print(len(vectors))
print(vectors[0][:10])

358
[0.054630666971206665, 0.005369423422962427, 0.003710131859406829, 0.016308151185512543, 0.011532897129654884, 0.03493821620941162, 0.003159772837534547, 0.0106293223798275, -0.05432399734854698, -0.04779635742306709]


## Vector Store

In [10]:
vector_store = InMemoryVectorStore(embeddings)

ids = vector_store.add_documents(documents=all_splits)

### Querying

In [None]:
query = "O que é a FRENID?"

results = vector_store.similarity_search(query)

for result in results:
    print(result)

page_content='Amom Mandel  Andreia Siqueira  
Aureo Ribeiro  Clarissa Tércio  
Bruno Farias  Danilo Forte  
Daniela Reinehr  Erika Kokay  
Dayany Bittencourt  Felipe Becari  
Dr. Francisco  Flávia Morais  
Duarte Jr. Geraldo Resende 
Márcio Honaiser  Gilberto Nascimento  
Márcio Jerry  Glaustin da Fokus  
Maria Rosas  Katia Dias  
Max Lemosvaga do Fdr PSOL-REDE Marcio Alvino  
Paulo Alexandre Barbosa  Miguel Lombardi  
Paulo Freire Costa  Rodrigo da Zaeli  
Pedro Campos  Rubens Otoni  
Sargento Portugal  Silvia Cristina  
Thiago Flores  Sonize Barbosa  
Weliton Prado  Soraya Santos  
Zé Haroldo Cathedral  2 vagas  
2 vagas   
Fdr PSOL-REDE  
(Dep. do PDT ocupa a vaga)  Talíria Petrone  
 
Secretário(a): Raquel Ferreira de Carvalho Aldigueri  
Local: Anexo II, Térreo, Ala A, sala 5 
Telefones: 3216-6971 a 76 
 
COMISSÃO DE DESENVOLVIMENTO ECONÔMICO  
Presidente: Lafayette de Andrada (REPUBLICANOS) 
1º Vice-Presidente: 
2º Vice-Presidente: 
3º Vice-Presidente: 
 
Titulares Suplentes 
PL/

## Response 

In [24]:
from openai import OpenAI

client = OpenAI()

with open("instructions.txt", "r", encoding="utf-8") as f:
    instructions = f.read()

try: 
    res = client.responses.create(
        model="gpt-4.1-nano",
        instructions=instructions,
        input=(
            f"Contexto: {results}; "
            f"Pergunta: {query}"
        )
    )
    print(res.output_text)
except Exception as e:
    print(f"An exception occured: {e}")

Não foi encontrado no contexto qualquer requerimento feito por Amom Mandel.
