In [3]:
import requests

import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

In [4]:
def get_ine_id(territory_key):
    number_part = territory_key.split('-')[-1]
    return int(number_part[:-2])

def correct_names_encoding_problems(name):
    return ''.join(chr(ord(c)) for c in name).decode('utf8')

# Normalize party name to be equal to the data related to election results
def normalize_party_names(party):
    if party == 'PCP - PEV':
        return 'PCP-PEV'
    if party == 'PNR':
        return 'P.N.R.'
    return party

 ### Objective: obtain the names of all candidates to president of "Câmara Municpal" in the 2013 Portuguese Local Elections. Goint to crawl that from here http://www.eleicoes.mai.gov.pt/autarquicas2013/candidatos.html

In [5]:
# If the default User-Agent header is used, the request is not successful.
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}

number_of_pages = 28
url_template = 'http://www.eleicoes.mai.gov.pt/autarquicas2013/static-data/candidates/PARTIES-CANDIDATES-CM-PAGE-{}.json'

In [6]:
# Each member of the list is a dict describing the candidates for each municipilaty
all_candidates = []

for i in range(1,number_of_pages + 1):
    result = requests.get(url_template.format(i), headers=headers)
    
    all_candidates += result.json()['electionCandidates']

In [16]:
result = requests.get(url_template.format(1), headers=headers)
parties = result.json()['parties']

In [17]:
# Check if the "alternateCandidates" value is ever not None
for munipality in all_candidates:
    for candidate in munipality['candidates']:
        if candidate['alternateCandidates'] is not None:
            print 'There is some alternate candidate'
            
# Ok, it never is

# Check if the there is ever more than one candidate
for munipality in all_candidates:
    for candidate in munipality['candidates']:
        if len(candidate['effectiveCandidates']) != 1:
            print 'There is more than one effective candidate'

In [18]:
# Prepare to transform to DataFrame
processed_candidates = []

for municipality in all_candidates:
    ine_id = get_ine_id(municipality['territoryKey'])
    
    for candidate in municipality['candidates']:
        party = normalize_party_names(candidate['party'])
        name = correct_names_encoding_problems(candidate['effectiveCandidates'][0])
        
        candidate_info= {'INE_ID': ine_id, 
                         'party': party,
                         'candidate_name': name}
        
        processed_candidates.append(candidate_info)

In [19]:
# Transform to DataFrame
candidates_df = DataFrame(processed_candidates)

In [20]:
# Correct Party symbols

def correct_party_name(candidate_name, correct_party_name):
    index = candidates_df[candidates_df.candidate_name == candidate_name].index[0]
    candidates_df.set_value(col='party', index=index, value=correct_party_name)

correct_party_name(u"Rui de Carvalho de Araújo Moreira", 'RM')
correct_party_name(u"Marco Paulo Caldeira de Almeida", 'scma')
correct_party_name(u"António Gordinho Trindade", 'gcicn')
correct_party_name(u"Aníbal Manuel Guerreiro Cordeiro", 'MIG')
correct_party_name(u"José Guilherme Aguiar", 'GAIA')
correct_party_name(u"Inácio José Ludovico Esperança", 'MUC')
correct_party_name(u"Nuno Miguel da Silva Pinhão Dâmaso Fazenda", 'MICA2013')
correct_party_name(u"Avelino Ferreira Torres", 'mccft')
correct_party_name(u"Aurélio Pedro Monteiro Ferreira", 'MpM')

In [21]:
# Add year column
candidates_df['year'] = 2013

In [22]:
candidates_df = candidates_df.set_index(['INE_ID'])
candidates_df.to_csv('data/processed_data/local_elections_candidates_2013.csv', encoding='utf-8')