In [1]:
import os
from utils import request, create_folder_if_not_exists, generate_dates
from tqdm import tqdm
import json

In [2]:
VOTACOES_FOLDER = './data/votacoes'
VOTOS_FOLDER = './data/votos'
LAST_DATE = "2023-11-14"
first_date = "2023-01-01"

TIPOS_VOTOS = {
    "Sim": 1,
    "Não": -1,
    "Abstenção": 0,
    "Artigo 17": 0,
    "Obstrução": -1
}

In [3]:
create_folder_if_not_exists(VOTACOES_FOLDER)
create_folder_if_not_exists(VOTOS_FOLDER)

Folder './data/votacoes' already exists.
Folder './data/votos' already exists.


# Pegar as votações do período

In [176]:
for date in tqdm(generate_dates(first_date, LAST_DATE, 30)[1:]):
    url = f"https://dadosabertos.camara.leg.br/api/v2/votacoes?dataInicio={first_date}&dataFim={date}&ordem=DESC&ordenarPor=dataHoraRegistro"
    resp = request(url)
    
    if len(resp) == 0:
        continue

    for votacao in resp:
        id_vot = votacao['id']
        with open(f'{VOTACOES_FOLDER}/{id_vot}', 'w', encoding='utf8') as f:
            json.dump(votacao, f)

    first_date = date

 90%|█████████ | 9/10 [50:27<11:21, 681.71s/it]

# Pegar os votos das votações

In [11]:
votacoes_files = os.listdir(VOTACOES_FOLDER)
votos_files = os.listdir(VOTOS_FOLDER)

for filename in tqdm(votacoes_files):
    with open(f'{VOTACOES_FOLDER}/{filename}') as f:
        votacao = json.load(f)

    id_votacao = votacao['id']

    if f"{id_votacao}.json" in votos_files:
        continue

    url_voto = f"https://dadosabertos.camara.leg.br/api/v2/votacoes/{id_votacao}/votos"
    
    resp = request(url_voto)
    
    if len(resp) > 0:
        votos = [{'id_votacao': id_votacao, **r } for r in resp]
    else:
        votos = []

    with open(f'{VOTOS_FOLDER}/{id_votacao}.json', 'w', encoding='utf8') as f:
        json.dump(votos, f)

votos = []
for filename in tqdm(os.listdir(VOTOS_FOLDER)):
    with open(f'{VOTOS_FOLDER}/{filename}') as f:
        votos += json.load(f)

  0%|          | 0/8090 [00:00<?, ?it/s]

100%|██████████| 8090/8090 [00:00<00:00, 9792.02it/s] 
100%|██████████| 8090/8090 [00:01<00:00, 5223.07it/s]


# Tratar e analisar

In [12]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [13]:
votos_trat = [
    {
        "tipo_voto": TIPOS_VOTOS[voto["tipoVoto"]],
        "data": voto["dataRegistroVoto"],
        "id_parl": voto["deputado_"]["id"],
        'id_votacao': voto['id_votacao']
    }
    for voto in votos
]

In [14]:
df = (
    pd.DataFrame(votos_trat)
    .pivot(columns="id_votacao", values="tipo_voto", index="id_parl")
    .fillna(0)
)

non_cons_votes = []
for column in df.columns:
    tot_1 = sum([1 for v in df.loc[:, column] if v == 1])
    tot_m_1 = sum([1  for v in df.loc[:, column] if v == -1])
    tot_0 = sum([1  for v in df.loc[:, column] if v == 0])
    
    convg = max(tot_1, tot_m_1) / (tot_1 + tot_m_1)
    non_0 = 1 - (tot_0 / (tot_1 + tot_m_1 + tot_0))
    
    if convg < .8  and non_0 > .2:
        non_cons_votes.append(column)

print(f"{len(non_cons_votes) / len(df.columns):.2%} das votações não foram consensuais e tiverem mais de 20% de não zeros.")

df_pivot = df.loc[:, non_cons_votes]

36.81% das votações não foram consensuais e tiverem mais de 20% de não zeros.


In [15]:
csm = csr_matrix(df_pivot.values)

model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(csm)

In [16]:
# create mapper from movie title to index
id_parl_to_idx = {b: i for i, b in enumerate(df_pivot.index)}

Waldemar : 91228
Cap. Augusto: 178829
Reginaldo Lopes: 74161
General Girão: 204473

In [17]:
def find_parl_data_by_id(id_):
    parl_url = f"https://dadosabertos.camara.leg.br/api/v2/deputados/{id_}"
    return request(parl_url)[0]

id_parl_ref = 178829
parl_ref = find_parl_data_by_id(id_parl_ref)

print(parl_ref['ultimoStatus']['nome'], parl_ref['ultimoStatus']['siglaPartido'])

TOT_NEIGHBORS = 10

resp = model.kneighbors(csm[id_parl_to_idx[id_parl_ref]], n_neighbors = TOT_NEIGHBORS, return_distance=True)

recs = []
for dist, idx in zip(resp[0][0][1:], resp[1][0][1:]):
    id_dep = df_pivot.iloc[idx].name
    parl_data = find_parl_data_by_id(id_dep)

    recs.append([id_dep, parl_data['ultimoStatus']['nome'], parl_data['ultimoStatus']['siglaPartido'], dist])

print(recs)

df_pivot.loc[[id_parl_ref, *[r[0] for r in recs]],:]

Capitão Augusto PL
[[204455, 'Luiz Lima', 'PL', 0.06054984913704953], [220546, 'Rodolfo Nogueira', 'PL', 0.0755464251047877], [220596, 'Amália Barros', 'PL', 0.08182941950531253], [220673, 'Delegado Caveira', 'PL', 0.0827507678683207], [220558, 'Zé Trovão', 'PL', 0.08757887175332213], [220547, 'Marcos Pollon', 'PL', 0.08845988104133651], [220559, 'Julia Zanatta', 'PL', 0.09148647484100336], [74585, 'Lincoln Portela', 'PL', 0.10057670468741986], [220652, 'Delegado Palumbo', 'MDB', 0.10291477285493944]]


id_votacao,1197773-99,2115253-104,2115253-111,2115253-116,2115253-118,2115253-156,2115253-167,2115253-175,2115253-179,2115253-183,...,2387066-91,2387066-94,2393997-9,2397925-16,345311-248,345311-257,345311-270,345311-276,345311-279,503670-99
id_parl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
178829,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,0.0,-1.0,1.0,1.0,1.0,1.0,1.0
204455,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220546,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220596,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220673,1.0,1.0,0.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220558,1.0,1.0,0.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220547,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,0.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,0.0
220559,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
74585,1.0,1.0,1.0,0.0,-1.0,1.0,1.0,-1.0,0.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0
220652,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,...,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,1.0,1.0


In [153]:


df_pivot.loc[[204473, 220596, 220611],:]

id_votacao,1301025-111,1570646-104,1570646-109,1738740-144,1738740-150,2146350-85,2149303-50,2160441-82,2160441-90,2168250-55,2191944-53,2224999-104,2224999-107,2224999-67,2224999-77,2224999-88,2224999-96,2226885-52,2277279-64,2277279-68,2309053-157,2309053-167,2309053-173,2309053-174,2309053-176,2309053-179,2323050-47,2325721-41,2348298-27,2355765-31,2358915-39,2358915-53,2358915-67,2376169-40,2376169-46,2376169-55,2376169-62,2383287-23,2383287-30,2383287-41,2383287-43,2383287-47,2383287-50,2383287-53,2383287-61,2387035-44,2387035-55,2394478-15,2400115-8,2400118-7,346155-112,503670-99,559138-201,559138-206
id_parl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
204473,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
220596,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
220611,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,0.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
