<a href="https://colab.research.google.com/github/LucasAlegre/vote-network/blob/master/vote_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install and import libraries

In [None]:
! pip install --user graphistry
! pip install python-igraph
! pip install networkx==2.6

Collecting graphistry
  Downloading graphistry-0.19.5-py3-none-any.whl (85 kB)
[?25l[K     |███▉                            | 10 kB 23.5 MB/s eta 0:00:01[K     |███████▋                        | 20 kB 28.5 MB/s eta 0:00:01[K     |███████████▌                    | 30 kB 12.7 MB/s eta 0:00:01[K     |███████████████▎                | 40 kB 9.7 MB/s eta 0:00:01[K     |███████████████████▏            | 51 kB 5.2 MB/s eta 0:00:01[K     |███████████████████████         | 61 kB 4.7 MB/s eta 0:00:01[K     |██████████████████████████▉     | 71 kB 5.1 MB/s eta 0:00:01[K     |██████████████████████████████▋ | 81 kB 5.8 MB/s eta 0:00:01[K     |████████████████████████████████| 85 kB 2.5 MB/s 
Installing collected packages: graphistry
Successfully installed graphistry-0.19.5
Collecting python-igraph
  Downloading python_igraph-0.9.6-cp37-cp37m-manylinux2010_x86_64.whl (3.2 MB)
[K     |████████████████████████████████| 3.2 MB 5.0 MB/s 
[?25hCollecting texttable>=1.6.2
  Downloadi

In [1]:
import graphistry
import networkx
import pandas as pd
import urllib.request
import requests
import numpy as np
from igraph import Graph, plot, summary, read
from itertools import combinations
# Init graphistry
graphistry.register(api=3, protocol="https", server="hub.graphistry.com", username="LucasAlegre", password="")

## Data Retrieval and Processing

In [2]:
start_date = '2019-01-31' #@param {type:"date"}
end_date = '2021-12-30' #@param {type:"date"}
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

In [3]:
# Collect all votes between start and end dates
for year in range(start_date.year, end_date.year + 1):

    filename = 'votacoesVotos-{}.csv'.format(year)
    urllib.request.urlretrieve('https://dadosabertos.camara.leg.br/arquivos/votacoesVotos/csv/votacoesVotos-{}.csv'.format(year), filename)
    data = pd.read_csv(filename, sep=';')

    # Remove simbolic votes
    data = data[data['voto'] != 'Simbólico']

    # Filter dates
    data['dataHoraVoto'] = pd.to_datetime(data['dataHoraVoto'])    
    mask = (data['dataHoraVoto'] >= start_date) & (data['dataHoraVoto'] <= end_date)
    data = data.loc[mask]
    data.sort_values(by=['dataHoraVoto'], inplace=True)
    
    if year == start_date.year:
        votes = data
    else:
        votes = pd.concat([votes, data])
votes

Unnamed: 0,idVotacao,uriVotacao,dataHoraVoto,voto,deputado_id,deputado_uri,deputado_nome,deputado_siglaPartido,deputado_uriPartido,deputado_siglaUf,deputado_idLegislatura,deputado_urlFoto
25046,2179189-45,https://dadosabertos.camara.leg.br/api/v2/vota...,2019-02-12 16:09:52,Obstrução,160592,https://dadosabertos.camara.leg.br/api/v2/depu...,Zeca Dirceu,PT,https://dadosabertos.camara.leg.br/api/v2/part...,PR,56,http://www.camara.gov.br/internet/deputado/ban...
24899,2179189-45,https://dadosabertos.camara.leg.br/api/v2/vota...,2019-02-12 16:10:00,Não,160674,https://dadosabertos.camara.leg.br/api/v2/depu...,Hugo Motta,PRB,https://dadosabertos.camara.leg.br/api/v2/part...,PB,56,http://www.camara.gov.br/internet/deputado/ban...
24981,2179189-45,https://dadosabertos.camara.leg.br/api/v2/vota...,2019-02-12 16:10:02,Não,194260,https://dadosabertos.camara.leg.br/api/v2/depu...,Nivaldo Albuquerque,PTB,https://dadosabertos.camara.leg.br/api/v2/part...,AL,56,http://www.camara.gov.br/internet/deputado/ban...
24848,2179189-45,https://dadosabertos.camara.leg.br/api/v2/vota...,2019-02-12 16:10:05,Não,141421,https://dadosabertos.camara.leg.br/api/v2/depu...,Eduardo da Fonte,PP,https://dadosabertos.camara.leg.br/api/v2/part...,PE,56,http://www.camara.gov.br/internet/deputado/ban...
24883,2179189-45,https://dadosabertos.camara.leg.br/api/v2/vota...,2019-02-12 16:10:09,Não,74270,https://dadosabertos.camara.leg.br/api/v2/depu...,Gilberto Nascimento,PSC,https://dadosabertos.camara.leg.br/api/v2/part...,SP,56,http://www.camara.gov.br/internet/deputado/ban...
...,...,...,...,...,...,...,...,...,...,...,...,...
187883,531331-340,https://dadosabertos.camara.leg.br/api/v2/vota...,2021-08-17 22:31:13,Não,178912,https://dadosabertos.camara.leg.br/api/v2/depu...,Pedro Cunha Lima,PSDB,https://dadosabertos.camara.leg.br/api/v2/part...,PB,56,http://www.camara.gov.br/internet/deputado/ban...
187893,531331-340,https://dadosabertos.camara.leg.br/api/v2/vota...,2021-08-17 22:31:15,Não,204565,https://dadosabertos.camara.leg.br/api/v2/depu...,Professora Dayane Pimentel,PSL,https://dadosabertos.camara.leg.br/api/v2/part...,BA,56,http://www.camara.gov.br/internet/deputado/ban...
187828,531331-340,https://dadosabertos.camara.leg.br/api/v2/vota...,2021-08-17 22:31:17,Não,178983,https://dadosabertos.camara.leg.br/api/v2/depu...,Marcio Alvino,PL,https://dadosabertos.camara.leg.br/api/v2/part...,SP,56,http://www.camara.gov.br/internet/deputado/ban...
187768,531331-340,https://dadosabertos.camara.leg.br/api/v2/vota...,2021-08-17 22:31:18,Não,178910,https://dadosabertos.camara.leg.br/api/v2/depu...,Joaquim Passarinho,PSD,https://dadosabertos.camara.leg.br/api/v2/part...,PA,56,http://www.camara.gov.br/internet/deputado/ban...


In [4]:
#%% Take care of different names for same deputy
for group, df_group in votes.groupby('deputado_id'):
    votes['deputado_nome'].loc[votes['deputado_id'] == group] = sorted(df_group['deputado_nome'].unique())[0]

#%% Partidos que mudaram de nome
votes['deputado_siglaPartido'].replace('PMDB', 'MDB', inplace=True)
votes['deputado_siglaPartido'].replace('PRB', 'REPUBLICANOS', inplace=True)
votes['deputado_siglaPartido'].replace('PR', 'PL', inplace=True)
votes['deputado_siglaPartido'].replace('PATRIOTA', 'PATRI', inplace=True)
votes['deputado_siglaPartido'].replace('PPS', 'CIDADANIA', inplace=True)

party_logo = {}
for p in votes['deputado_uriPartido'].unique():
    party_logo[p] = requests.get(p).json()['dados']['urlLogo']

#all_data['deputado_siglaPartido'].replace('PPL', np.nan, inplace=True) # PPL foi incorporado
#all_data['deputado_siglaPartido'].replace('PRP', np.nan, inplace=True) # PRP foi incorporado
#all_data['deputado_siglaPartido'].replace('PHS', np.nan, inplace=True) # PHS foi incorporado
# all_data = pd.merge(all_data, motions_themes, on="idVotacao", how="inner") 
#all_data['deputado_siglaPartido'].fillna('S.PART.', inplace=True)
# all_data.groupby('idVotacao')['voto'].count()

#all_data.to_csv('votos_{}_to_{}.csv'.format(start_date, end_date), index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, val

## Graph Construction

In [5]:
def generalized_similarity(m, min_eps=0.001, max_iter=1000):
    """ Balázs Kovács, "A generalized model of relational similarity," Social Networks, 32(3), July 2010, pp. 197–211
        Based on: https://github.com/dzinoviev/generalizedsimilarity
    """
    arcs0 = m - m.mean(axis=1)[:, np.newaxis]
    arcs1 = m.T - m.mean(axis=0)[:, np.newaxis]

    eps = min_eps + 1
    N = np.eye(m.shape[1])

    iters = 0
    while (eps > min_eps and iters < max_iter) or np.isnan(N).any():
        M = arcs0.dot(N).dot(arcs0.T)
        m = np.sqrt(M.diagonal())
        M = ((M / (m+1e-8)).T / (m+1e-8)).T
        
        Np = arcs1.dot(M).dot(arcs1.T)
        n = np.sqrt(Np.diagonal())
        Np = ((Np / (n+1e-8)).T / (n+1e-8)).T
        eps = np.abs(Np - N).max()
        N = Np

        iters += 1
    return M
    
def filter_edges(edges_list, num_nodes, threshold=None, density=0.1):
    edges, weights = [], []
    if threshold is not None:
        for e in edges_list:
            if e[1] >= threshold:
                edges.append(e[0])
                weights.append(e[1])
    else:
        count = int(num_nodes * (num_nodes - 1) * density / 2)
        edges_list.sort(reverse=True, key=lambda e: e[1])
        edges_list = edges_list[:count]
        edges = [e[0] for e in edges_list]
        weights = [e[1] for e in edges_list]
    return edges, weights

In [6]:
reps = votes['deputado_nome'].unique()
rep_to_ind = {reps[i]: i for i in range(len(reps))}
motions = votes['idVotacao'].unique()
motion_to_ind = {motions[i]: i for i in range(len(motions))}
parties = [p for p in votes['deputado_siglaPartido'].unique() if pd.notna(p)]

vote_matrix = np.zeros((len(reps), len(motions)))
df_grouped = votes.groupby(['idVotacao', 'deputado_nome'])
for group, df_group in df_grouped:
    voto = df_group['voto'].values[0]
    i = rep_to_ind[group[1]]
    j = motion_to_ind[group[0]]
    if voto == "Sim":
        vote_matrix[i,j] = 1
    if voto == "Não":
        vote_matrix[i,j] = -1

In [7]:
M = generalized_similarity(vote_matrix)
edges = []
for dep1, dep2 in combinations(range(len(reps)), 2):
    if M[dep1,dep2] > 0:
        edges.append(((dep1,dep2), M[dep1,dep2]))

#plot_similarity_distribution([e[1] for e in edges if e[1] > 0.99], weight_threshold)

graph = Graph(graph_attrs={'name': 'Câmara dos Deputados'}, directed=False)
graph.add_vertices(reps)
edges, weights = filter_edges(edges, num_nodes=graph.vcount(), threshold=0, density=0.1)
graph.add_edges(edges)
graph.es['weight'] = weights
graph.es['similarity'] = weights
# Normalize weights to [0,1]
#maxw = max(graph.es['weight'])
#minw = min(graph.es['weight'])
#graph.es['weight'] = [(e - minw) / (maxw - minw) for e in graph.es['weight']]

info = [votes[votes['deputado_nome']==dep] for dep in graph.vs['name']]
graph.vs['Foto'] = [x['deputado_urlFoto'].values[-1] for x in info]
graph.vs['UF'] = [x['deputado_siglaUf'].values[-1] for x in info]
graph.vs['Partido'] = [x['deputado_siglaPartido'].values[-1] for x in info]
graph.vs['Partido ID'] = [parties.index(p) for p in graph.vs['Partido']]
graph.vs['URL'] =  [x['deputado_uri'].values[-1] for x in info]
graph.vs['Partido URI'] =  [x['deputado_uriPartido'].values[-1] for x in info]
summary(graph)

IGRAPH UNW- 575 102961 -- Câmara dos Deputados
+ attr: name (g), Foto (v), Partido (v), Partido ID (v), Partido URI (v), UF (v), URL (v), name (v), similarity (e), weight (e)


## Visualization

In [8]:
g = graphistry.bind(source='src', destination='dst', point_label='name', point_size='name', edge_weight='similarity')
(e_df, n_df) = g.igraph2pandas(graph)
g = g.nodes(n_df).edges(e_df)
#g = g.nodes(lambda g: g._nodes.assign(community=g._nodes['Partido ID'].astype('int32')))
g = g.addStyle(bg={'color': 'white'}, page={'title': 'Câmara dos Deputados'}) 
g = g.encode_point_icon('Partido URI', categorical_mapping=party_logo, shape='circle')
g = g.settings(url_params={
    'pageTitle': 'Câmara dos Deputados',
    'play': 2000,
    'menu': True, 'info': True,
    'strongGravity': True,
    'showArrows': False,
    'pointSize': 5.0,
    'pointsOfInterestMax': 100,
    'edgeInfluence': 2})
g.plot()