<a href="https://colab.research.google.com/github/LucasAlegre/vote-network/blob/master/vote_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install and import libraries

In [1]:
! pip install --user graphistry
! pip install python-igraph

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting graphistry
  Downloading graphistry-0.25.2-py3-none-any.whl (167 kB)
[K     |████████████████████████████████| 167 kB 5.6 MB/s 
Installing collected packages: graphistry
Successfully installed graphistry-0.25.2
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting python-igraph
  Downloading python_igraph-0.9.10-py3-none-any.whl (9.1 kB)
Collecting igraph==0.9.10
  Downloading igraph-0.9.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 5.0 MB/s 
[?25hCollecting texttable>=1.6.2
  Downloading texttable-1.6.4-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph, python-igraph
Successfully installed igraph-0.9.10 python-igraph-0.9.10 texttable-1.6.4


In [2]:
import graphistry
import pandas as pd
import urllib.request
import requests
from datetime import date
import numpy as np
from igraph import Graph, summary
from itertools import combinations
import os
# Init graphistry
graphistry.register(api=3, protocol="https", server="hub.graphistry.com", username="LucasAlegre", password="")

## Data Retrieval and Processing

In [3]:
start_date = '2022-01-01' #@param {type:"date"}
end_date = '2022-12-31' #@param {type:"date"}
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

In [4]:
# Collect all votes between start and end dates
os.makedirs('data', exist_ok=True)
for year in range(start_date.year, end_date.year + 1):
    # Read or download votes
    path = 'data/votacoesVotos-{}.csv'.format(year)
    if not os.path.isfile(path) or year == 2021:
        urllib.request.urlretrieve('https://dadosabertos.camara.leg.br/arquivos/votacoesVotos/csv/votacoesVotos-{}.csv'.format(year), path)
    vote_data = pd.read_csv(path, sep=';')

    # Remove simbolic votes
    vote_data = vote_data[vote_data['voto'] != 'Simbólico']

    # Filter dates
    vote_data['dataHoraVoto'] = pd.to_datetime(vote_data['dataHoraVoto'])    
    vote_data = vote_data.loc[(vote_data['dataHoraVoto'] >= start_date) & (vote_data['dataHoraVoto'] <= end_date)]
    vote_data.sort_values(by=['dataHoraVoto'], inplace=True)

    if year == start_date.year:
        votes = vote_data
    else:
        votes = pd.concat([votes, vote_data])
votes

Unnamed: 0,idVotacao,uriVotacao,dataHoraVoto,voto,deputado_id,deputado_uri,deputado_nome,deputado_siglaPartido,deputado_uriPartido,deputado_siglaUf,deputado_idLegislatura,deputado_urlFoto
50208,2312874-9,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-02-09 16:43:57,Artigo 17,160541,https://dadosabertos.camara.leg.br/api/v2/depu...,Arthur Lira,PP,https://dadosabertos.camara.leg.br/api/v2/part...,AL,56,http://www.camara.gov.br/internet/deputado/ban...
50186,2312874-9,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-02-09 16:44:11,Não,178972,https://dadosabertos.camara.leg.br/api/v2/depu...,Alex Manente,CIDADANIA,https://dadosabertos.camara.leg.br/api/v2/part...,SP,56,http://www.camara.gov.br/internet/deputado/ban...
50580,2312874-9,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-02-09 16:44:12,Não,178887,https://dadosabertos.camara.leg.br/api/v2/depu...,Rubens Pereira Júnior,PCdoB,https://dadosabertos.camara.leg.br/api/v2/part...,MA,56,http://www.camara.gov.br/internet/deputado/ban...
50318,2312874-9,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-02-09 16:44:12,Sim,178871,https://dadosabertos.camara.leg.br/api/v2/depu...,Evair Vieira de Melo,PP,https://dadosabertos.camara.leg.br/api/v2/part...,ES,56,http://www.camara.gov.br/internet/deputado/ban...
50343,2312874-9,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-02-09 16:44:13,Sim,204484,https://dadosabertos.camara.leg.br/api/v2/depu...,General Peternelli,PSL,https://dadosabertos.camara.leg.br/api/v2/part...,SP,56,http://www.camara.gov.br/internet/deputado/ban...
...,...,...,...,...,...,...,...,...,...,...,...,...
89005,2325377-10,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-05-31 20:50:57,Sim,204567,https://dadosabertos.camara.leg.br/api/v2/depu...,Raimundo Costa,PODE,https://dadosabertos.camara.leg.br/api/v2/part...,BA,56,http://www.camara.gov.br/internet/deputado/ban...
88932,2325377-10,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-05-31 20:51:09,Sim,156190,https://dadosabertos.camara.leg.br/api/v2/depu...,Marcel van Hattem,NOVO,https://dadosabertos.camara.leg.br/api/v2/part...,RS,56,http://www.camara.gov.br/internet/deputado/ban...
88719,2325377-10,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-05-31 20:51:33,Sim,204516,https://dadosabertos.camara.leg.br/api/v2/depu...,Alexis Fonteyne,NOVO,https://dadosabertos.camara.leg.br/api/v2/part...,SP,56,http://www.camara.gov.br/internet/deputado/ban...
88865,2325377-10,https://dadosabertos.camara.leg.br/api/v2/vota...,2022-05-31 20:52:01,Sim,73482,https://dadosabertos.camara.leg.br/api/v2/depu...,Henrique Fontana,PT,https://dadosabertos.camara.leg.br/api/v2/part...,RS,56,http://www.camara.gov.br/internet/deputado/ban...


In [5]:
def get_total_expenses(deputy_id, start_date, end_date):
    #expenses = []
    total = 0.0
    years = '&'.join(['ano={}'.format(year) for year in range(start_date.year, end_date.year+1)])
    url = "https://dadosabertos.camara.leg.br/api/v2/deputados/{}/despesas?{}&itens=100000&ordem=ASC&ordenarPor=ano".format(deputy_id, years)
    read_all = False
    while not read_all:
        read_all = True
        page = requests.get(url).json()
        if 'dados' not in page:
            break
        total += sum([e['valorDocumento'] for e in page['dados']])
        #this_expenses = [{'tipo': e['tipoDespesa'], 'valor': e['valorDocumento']} for e in page['dados']]
        #expenses.extend(this_expenses)
        for link in page['links']:
            if link['rel'] == 'next':
                url = link['href']
                read_all = False
                break
    return total

def get_deputy_info(deputy_id):
    url = 'https://dadosabertos.camara.leg.br/api/v2/deputados/{}'.format(deputy_id)
    data = requests.get(url).json()['dados']
    ultimoStatus = data['ultimoStatus']
    today = date.today()
    nascimento = pd.to_datetime(data['dataNascimento'])
    idade = today.year - nascimento.year - ((today.month, today.day) < (nascimento.month, nascimento.day))
    return {'sexo': data['sexo'], 
            'escolaridade': data['escolaridade'], 
            'idade': idade, 
            'e-mail': ultimoStatus['email'], 
            'situação': ultimoStatus['situacao'],
            'cidade natal': data['municipioNascimento']}

In [6]:
#%% Take care of different names for same deputy
for group, df_group in votes.groupby('deputado_id'):
    votes['deputado_nome'].loc[votes['deputado_id'] == group] = sorted(df_group['deputado_nome'].unique())[0]

#%% Partidos que mudaram de nome
votes['deputado_siglaPartido'].replace('PMDB', 'MDB', inplace=True)
votes['deputado_siglaPartido'].replace('PRB', 'REPUBLICANOS', inplace=True)
votes['deputado_siglaPartido'].replace('PR', 'PL', inplace=True)
votes['deputado_siglaPartido'].replace('PATRIOTA', 'PATRI', inplace=True)
votes['deputado_siglaPartido'].replace('PPS', 'CIDADANIA', inplace=True)

# Logo dos partidos
party_logo = {}
for p in votes['deputado_uriPartido'].unique():
    if not pd.isna(p):
        dados = requests.get(p).json()['dados']
        if dados['sigla'] == 'MDB':
            party_logo[p] = 'https://logodownload.org/wp-content/uploads/2018/04/mdb-logo-partido.png'
        elif dados['sigla'] == 'SOLIDARIEDADE':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/commons/f/fe/Logomarca_do_Partido_Solidariedade.png'
        elif dados['sigla'] == 'PATRI':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/en/8/8e/Patriota_logo.png'
        elif dados['sigla'] == 'REPUBLICANOS':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/en/0/0d/Republicanos_logo.png'
        elif dados['sigla'] == 'PL':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/commons/0/03/PL-logo.jpg'
        elif dados['sigla'] == 'CIDADANIA':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/commons/d/d7/Logo_do_Cidadania_23.png'
        elif dados['sigla'] == 'NOVO':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/commons/b/b5/Novo30_AOC.png'
        elif dados['sigla'] == 'UNIÃO':
            party_logo[p] = 'https://upload.wikimedia.org/wikipedia/commons/7/73/Uni%C3%A3o_Brasil_logo.svg'
        else:
            party_logo[p] = dados['urlLogo']

#all_data['deputado_siglaPartido'].replace('PPL', np.nan, inplace=True) # PPL foi incorporado
#all_data['deputado_siglaPartido'].replace('PRP', np.nan, inplace=True) # PRP foi incorporado
#all_data['deputado_siglaPartido'].replace('PHS', np.nan, inplace=True) # PHS foi incorporado
# all_data = pd.merge(all_data, motions_themes, on="idVotacao", how="inner") 
#all_data['deputado_siglaPartido'].fillna('S.PART.', inplace=True)
# all_data.groupby('idVotacao')['voto'].count()

#all_data.to_csv('votos_{}_to_{}.csv'.format(start_date, end_date), index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


## Graph Construction

In [7]:
def generalized_similarity(m, min_eps=0.001, max_iter=1000):
    """ Balázs Kovács, "A generalized model of relational similarity," Social Networks, 32(3), July 2010, pp. 197–211
        Based on: https://github.com/dzinoviev/generalizedsimilarity
    """
    arcs0 = m - m.mean(axis=1)[:, np.newaxis]
    arcs1 = m.T - m.mean(axis=0)[:, np.newaxis]

    eps = min_eps + 1
    N = np.eye(m.shape[1])

    iters = 0
    while (eps > min_eps and iters < max_iter) or np.isnan(N).any():
        M = arcs0.dot(N).dot(arcs0.T)
        m = np.sqrt(M.diagonal())
        M = ((M / (m+1e-8)).T / (m+1e-8)).T
        
        Np = arcs1.dot(M).dot(arcs1.T)
        n = np.sqrt(Np.diagonal())
        Np = ((Np / (n+1e-8)).T / (n+1e-8)).T
        eps = np.abs(Np - N).max()
        N = Np

        iters += 1
    return M
    
def pearson_correlation(m):
    arcs0 = m - m.mean(axis=1)[:, np.newaxis]
    arcs1 = m.T - m.mean(axis=0)[:, np.newaxis]
    M = arcs0.dot(arcs0.T)
    m = np.sqrt(M.diagonal())
    M = ((M / m).T / m).T
    return M

def filter_edges(edges_list, num_nodes, threshold=None, density=0.1):
    edges, weights = [], []
    if threshold is not None:
        for e in edges_list:
            if e[1] >= threshold:
                edges.append(e[0])
                weights.append(e[1])
    else:
        count = int(num_nodes * (num_nodes - 1) * density / 2)
        edges_list.sort(reverse=True, key=lambda e: e[1])
        edges_list = edges_list[:count]
        edges = [e[0] for e in edges_list]
        weights = [e[1] for e in edges_list]
    return edges, weights

In [8]:
# Vote Matrix
reps = votes['deputado_nome'].unique()
rep_to_ind = {reps[i]: i for i in range(len(reps))}
motions = votes['idVotacao'].unique()
motion_to_ind = {motions[i]: i for i in range(len(motions))}
parties = [p for p in votes['deputado_siglaPartido'].unique() if pd.notna(p)]

vote_matrix = np.zeros((len(reps), len(motions)))
df_grouped = votes.groupby(['idVotacao', 'deputado_nome'])
for group, df_group in df_grouped:
    voto = df_group['voto'].values[0]
    i = rep_to_ind[group[1]]
    j = motion_to_ind[group[0]]
    if voto == "Sim":
        vote_matrix[i,j] = 1
    elif voto == "Não":
        vote_matrix[i,j] = -1

In [9]:
# Edges Generation
M = generalized_similarity(vote_matrix)
edges = []
for dep1, dep2 in combinations(range(len(reps)), 2):
    if M[dep1,dep2] > 0:
        edges.append(((dep1,dep2), M[dep1,dep2]))

In [10]:
# i-graph
graph = Graph(graph_attrs={'name': 'Câmara dos Deputados'}, directed=False)
graph.add_vertices(reps)
filer_edges, weights = filter_edges(edges, num_nodes=graph.vcount(), threshold=None, density=0.2) # threshold 0.9998
graph.add_edges(filer_edges)
graph.es['weight'] = weights
graph.es['similarity'] = weights
maxw = max(graph.es['weight'])
minw = min(graph.es['weight'])
graph.es['weight'] = [(e - minw) / (maxw - minw) for e in graph.es['weight']] # Normalize weights to [0,1]

In [11]:
# Community Detection
graph.vs['community'] = graph.community_leiden(weights='weight', objective_function='modularity', resolution_parameter=1, n_iterations=100).membership
graph.vs['community_leiden'] = [str(c) for c in graph.vs['community']]
print(graph.vs['community'])
print(graph.modularity(graph.vs['community'], weights='weight'))
""" graph.delete_edges()
edges, weights = filter_edges(edges, num_nodes=graph.vcount(), threshold=0.0, density=0.1)
graph.add_edges(edges)
graph.es['weight'] = weights
graph.es['similarity'] = weights """

[0, 1, 2, 3, 3, 1, 4, 5, 2, 0, 2, 2, 2, 2, 5, 5, 3, 5, 3, 5, 5, 3, 6, 4, 3, 0, 7, 3, 3, 5, 1, 0, 5, 3, 5, 8, 5, 5, 3, 5, 5, 3, 2, 3, 1, 2, 5, 0, 5, 3, 3, 2, 2, 1, 2, 3, 3, 5, 3, 5, 3, 5, 2, 5, 5, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 1, 5, 2, 2, 5, 1, 2, 3, 2, 1, 3, 0, 0, 9, 2, 3, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 3, 2, 2, 5, 3, 3, 5, 5, 3, 2, 2, 0, 0, 3, 3, 3, 3, 2, 5, 5, 3, 3, 5, 5, 3, 0, 3, 2, 2, 10, 2, 3, 1, 5, 11, 5, 2, 3, 5, 5, 5, 1, 2, 5, 2, 0, 1, 3, 5, 3, 3, 0, 2, 0, 12, 3, 3, 5, 0, 13, 5, 3, 0, 5, 2, 1, 2, 0, 4, 3, 3, 5, 3, 0, 3, 14, 5, 3, 9, 1, 0, 5, 2, 5, 5, 3, 5, 1, 4, 5, 2, 2, 5, 0, 3, 3, 5, 9, 0, 2, 3, 3, 2, 2, 2, 11, 3, 1, 5, 4, 0, 4, 1, 15, 2, 5, 3, 3, 9, 9, 9, 16, 3, 4, 17, 5, 2, 5, 5, 4, 2, 3, 5, 5, 9, 1, 3, 5, 1, 2, 5, 1, 5, 3, 9, 0, 18, 2, 3, 2, 3, 3, 0, 5, 2, 0, 5, 3, 19, 7, 5, 9, 9, 2, 2, 5, 20, 3, 5, 3, 3, 0, 5, 2, 3, 13, 0, 3, 0, 3, 1, 2, 5, 5, 1, 0, 9, 11, 5, 3, 5, 5, 9, 3, 2, 5, 2, 3, 5, 3, 5, 3, 5, 2, 21, 1, 5, 2, 5, 5, 0, 22, 1, 3, 5, 1, 3, 5, 0, 3, 3, 3, 3, 3, 5,

" graph.delete_edges()\nedges, weights = filter_edges(edges, num_nodes=graph.vcount(), threshold=0.0, density=0.1)\ngraph.add_edges(edges)\ngraph.es['weight'] = weights\ngraph.es['similarity'] = weights "

In [12]:
# Add info to nodes
info = [votes[votes['deputado_nome']==dep] for dep in graph.vs['name']]
graph.vs['Foto'] = [x['deputado_urlFoto'].values[-1] for x in info]
graph.vs['UF'] = [x['deputado_siglaUf'].values[-1] for x in info]
graph.vs['Partido'] = [x['deputado_siglaPartido'].values[-1] for x in info]
graph.vs['URL'] =  [x['deputado_uri'].values[-1] for x in info]
graph.vs['Partido URI'] =  [x['deputado_uriPartido'].values[-1] for x in info]
graph.vs['Total Despesas (reais)'] = [get_total_expenses(x['deputado_id'].values[-1], start_date, end_date) for x in info]
for i, x in enumerate(info):
    for k, v in get_deputy_info(x['deputado_id'].values[-1]).items():
        if i == 0:
            graph.vs[k] = ['' for _ in range(len(graph.vs))]
        graph.vs[i][k] = v

graph.vs['betweenness'] = graph.betweenness()
graph.vs['closeness'] = graph.closeness()

summary(graph)

IGRAPH UNW- 541 29214 -- Câmara dos Deputados
+ attr: name (g), Foto (v), Partido (v), Partido URI (v), Total Despesas (reais) (v), UF (v), URL (v), betweenness (v), cidade natal (v), closeness (v), community (v), community_leiden (v), e-mail (v), escolaridade (v), idade (v), name (v), sexo (v), situação (v), similarity (e), weight (e)


## Visualization

In [14]:
g = graphistry.bind(source='src', destination='dst', 
                    point_label='name', 
                    edge_weight='similarity', 
                    edge_label='similarity', 
                    edge_size='similarity',
                    point_color='community',
                    point_size='name')
(e_df, n_df) = g.igraph2pandas(graph)
g = g.nodes(n_df).edges(e_df)
g = g.nodes(lambda g: g._nodes.assign(community=g._nodes['community'].astype('int32')))
g = g.addStyle(bg={'color': 'white'}, page={'title': 'Câmara dos Deputados'}) 
g = g.encode_point_icon('Partido URI', categorical_mapping=party_logo, shape='circle')
g = g.settings(url_params={
    'pageTitle': 'Câmara dos Deputados',
    'play': 2000,
    'menu': True, 
    'info': True,
    'strongGravity': True,
    'showArrows': False,
    'pointSize': 5.0,
    'pointsOfInterestMax': 100,
    'edgeInfluence': 1.0,
    'showLabels': True, 'showLabelOnHover': True,
    'showPointsOfInterest': True, 'showPointsOfInterestLabel': True, 'showLabelPropertiesOnHover': True})
g.plot(render=True)

  
