In [172]:
import os
from utils import request
from tqdm import tqdm
import json

In [None]:
VOTACOES_FOLDER = './data/votacoes'
VOTOS_FOLDER = './data/votos'

TIPOS_VOTOS = {
    "Sim": 1,
    "Não": -1,
    "Abstenção": 0,
    "Artigo 17": 0,
    "Obstrução": -1
}

# Pegar as votações do período

In [164]:
from datetime import datetime, timedelta

def generate_dates(start_date, end_date, step=1, **kwargs):
    """
    Generate dates between a start date and an end date with a specified step size.

    Parameters:
    - start_date (str or datetime): Start date in 'YYYY-MM-DD' format or as a datetime object.
    - end_date (str or datetime): End date in 'YYYY-MM-DD' format or as a datetime object.
    - step (int): Number of days between consecutive dates (default is 1).

    Returns:
    - list of datetime objects representing the generated dates.
    """
    input_format = kwargs.get('input_format', '%Y-%m-%d')
    output_format = kwargs.get('output_format', '%Y-%m-%d')
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, input_format)
    if isinstance(end_date, str):
        end_date = datetime.strptime(end_date, input_format)

    current_date = start_date
    dates = []

    while current_date <= end_date:
        dates.append(current_date.strftime(output_format))
        current_date += timedelta(days=step)

    return dates

In [176]:
LAST_DATE = "2023-11-14"
first_date = "2023-01-01"

for date in tqdm(generate_dates(first_date, LAST_DATE, 30)[1:]):
    url = f"https://dadosabertos.camara.leg.br/api/v2/votacoes?dataInicio={first_date}&dataFim={date}&ordem=DESC&ordenarPor=dataHoraRegistro"
    resp = request(url)
    
    if len(resp) == 0:
        continue

    for votacao in resp:
        id_vot = votacao['id']
        with open(f'{VOTACOES_FOLDER}/{id_vot}', 'w', encoding='utf8') as f:
            json.dump(votacao, f)


 50%|█████     | 5/10 [03:23<05:37, 67.41s/it]

# Pegar os votos das votações

In [None]:
votacoes_files = os.listdir(VOTACOES_FOLDER)

for filename in tqdm(votacoes_files):
    with open(f'{VOTACOES_FOLDER}/{filename}') as f:
        votacao = json.load(f)

    id_votacao = votacao['id']
    url_voto = f"https://dadosabertos.camara.leg.br/api/v2/votacoes/{id_votacao}/votos"
    
    resp = request(url_voto)
    
    if len(resp) > 0:
        votos = [{'id_votacao': id_votacao, **r } for r in resp]

        with open(f'{VOTOS_FOLDER}/{id_votacao}.json') as f:
            json.dump(votos, f)

votos = []
for filename in tqdm(os.listdir({VOTOS_FOLDER})):
    with open(f'{VOTOS_FOLDER}/{filename}') as f:
        votos += json.load(f)

# Tratar e analisar

In [64]:
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import pandas as pd

In [65]:
votos_trat = [
    {
        "tipo_voto": TIPOS_VOTOS[voto["tipoVoto"]],
        "data": voto["dataRegistroVoto"],
        "id_parl": voto["deputado_"]["id"],
        'id_votacao': voto['id_votacao']
    }
    for voto in votos
]

In [66]:
df_pivot = (
    pd.DataFrame(votos_trat)
    .pivot(columns="id_votacao", values="tipo_voto", index="id_parl")
    .fillna(0)
)

In [67]:
csm = csr_matrix(df_pivot.values)

model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(csm)

In [120]:
# create mapper from movie title to index
id_parl_to_idx = {b: i for i, b in enumerate(df_pivot.index)}

Waldemar : 91228
Cap. Augusto: 178829
Reginaldo Lopes: 74161
General Girão: 204473


In [154]:
def find_parl_data_by_id(id_):
    parl_url = f"https://dadosabertos.camara.leg.br/api/v2/deputados/{id_}"
    return request(parl_url)[0]

id_parl_ref = 91228
parl_ref = find_parl_data_by_id(id_parl_ref)

print(parl_ref['ultimoStatus']['nome'], parl_ref['ultimoStatus']['siglaPartido'])

resp = model.kneighbors(csm[id_parl_to_idx[id_parl_ref]], n_neighbors = 5, return_distance=True)

recs = []
for dist, idx in zip(resp[0][0][1:], resp[1][0][1:]):
    id_dep = df_pivot.iloc[idx].name
    parl_data = find_parl_data_by_id(id_dep)

    recs.append([id_dep, parl_data['ultimoStatus']['nome'], parl_data['ultimoStatus']['siglaPartido'], dist])
recs

Waldemar Oliveira AVANTE


[[220667, 'Pedro Campos', 'PSB', 0.14145428945178562],
 [204355, 'Da Vitoria', 'PP', 0.15720461910731087],
 [220543, 'Ricardo Ayres', 'REPUBLICANOS', 0.17586351854167648],
 [160535, 'Marcon', 'PT', 0.17739640368116705]]

In [153]:
# Assuming df is your DataFrame
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df_pivot.loc[[204473, 220596, 220611],:]

id_votacao,1301025-111,1570646-104,1570646-109,1738740-144,1738740-150,2146350-85,2149303-50,2160441-82,2160441-90,2168250-55,2191944-53,2224999-104,2224999-107,2224999-67,2224999-77,2224999-88,2224999-96,2226885-52,2277279-64,2277279-68,2309053-157,2309053-167,2309053-173,2309053-174,2309053-176,2309053-179,2323050-47,2325721-41,2348298-27,2355765-31,2358915-39,2358915-53,2358915-67,2376169-40,2376169-46,2376169-55,2376169-62,2383287-23,2383287-30,2383287-41,2383287-43,2383287-47,2383287-50,2383287-53,2383287-61,2387035-44,2387035-55,2394478-15,2400115-8,2400118-7,346155-112,503670-99,559138-201,559138-206
id_parl,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1
204473,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
220596,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
220611,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,-1.0,0.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0
