In [128]:
import requests

import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

 ### Objective: obtain the names of all candidates to president of "Câmara Municpal" in the 2013 Portuguese Local Elections. Goint to crawl that from here http://www.eleicoes.mai.gov.pt/autarquicas2013/candidatos.html

In [129]:
# If the default User-Agent header is used, the request is not successful.
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'}

number_of_pages = 28
url_template = 'http://www.eleicoes.mai.gov.pt/autarquicas2013/static-data/candidates/PARTIES-CANDIDATES-CM-PAGE-{}.json'

In [130]:
# Each member of the list is a dict describing the candidates for each municipilaty
all_candidates = []

for i in range(1,number_of_pages + 1):
    result = requests.get(url_template.format(i), headers=headers)
    
    all_candidates += result.json()['electionCandidates']

In [157]:
result = requests.get(url_template.format(1), headers=headers)
parties = result.json()['parties']

In [131]:
# Check if the "alternateCandidates" value is ever not None
for munipality in all_candidates:
    for candidate in munipality['candidates']:
        if candidate['alternateCandidates'] is not None:
            print 'There is some alternate candidate'
            
# Ok, it never is

# Check if the there is ever more than one candidate
for munipality in all_candidates:
    for candidate in munipality['candidates']:
        if len(candidate['effectiveCandidates']) != 1:
            print 'There is more than one effective candidate'

In [163]:
def get_ine_id(territory_key):
    number_part = territory_key.split('-')[-1]
    return int(number_part[:-2])

def correct_names_encoding_problems(name):
    return ''.join(chr(ord(c)) for c in name).decode('utf8')

# Normalize party name to be equal to the data related to election results
def normalize_party_names(party):
    if party == 'PCP - PEV':
        return 'PCP-PEV'
    if party == 'II':
        return 'JPP'
    return party

In [164]:
# Prepare to transform to DataFrame
processed_candidates = []

for municipality in all_candidates:
    ine_id = get_ine_id(municipality['territoryKey'])
    
    for candidate in municipality['candidates']:
        party = normalize_party_names(candidate['party'])
        name = correct_names_encoding_problems(candidate['effectiveCandidates'][0])
        
        candidate_info= {'ine_id': ine_id, 
                         'party': party,
                         'name': name}
        
        processed_candidates.append(candidate_info)

In [165]:
candidates_df = DataFrame(processed_candidates)
candidates_df = candidates_df.set_index(['ine_id'])

In [172]:
candidates_df.ix['3101']

Unnamed: 0_level_0,name,party
ine_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3101,Martinho Gouveia da Câmara,CDS-PP
3101,José Ferdinando Correia da Costa,PCP-PEV
3101,Carlos Manuel Figueira de Ornelas Teles,PPD/PSD
3101,Maria Patrícia Mendes Sumares Nobrega,PS


In [166]:
for party in np.sort(candidates_df.party.unique()):
    print party

B.E.
CDS-PP
CDS-PP.MPT
CDS-PP.MPT.PPM
CDS-PP.PPD/PSD
I
III
IV
IX
JPP
MPT
PAN
PCP-PEV
PCTP/MRPP
PND
PNR
PPD/PSD
PPD/PSD.CDS-PP
PPD/PSD.CDS-PP.MPT
PPD/PSD.CDS-PP.MPT.PPM
PPD/PSD.CDS-PP.PPM
PPD/PSD.CDS-PP.PPM.MPT
PPD/PSD.MPT
PPD/PSD.MPT.PPM
PPD/PSD.PPM
PPD/PSD.PPM.MPT
PPM
PPM-PPV
PPM/PPV/PND
PPV
PS
PS-BE-PND-MPT-PTP-PAN
PS-PTP-PND-BE
PTP
V
VI
VII
VIII
X
XI
XII
XIII
XIX
XV
XVI
XVII
XVIII
XX


In [158]:
parties

[{u'acronym': u'B.E.', u'name': u'Bloco de esquerda'},
 {u'acronym': u'CDS-PP', u'name': u'CDS-PARTIDO POPULAR'},
 {u'acronym': u'CDS-PP.MPT', u'name': u'"POR CONST\xc3\x82NCIA"'},
 {u'acronym': u'CDS-PP.MPT.PPM', u'name': u'"Odivelas Merece Mais"'},
 {u'acronym': u'CDS-PP.PPD/PSD', u'name': u'"DESENVOLVER PORTEL"'},
 {u'acronym': u'I', u'name': u'"Amigos do Po\xc3\xa7o do Canto" - APC'},
 {u'acronym': u'II', u'name': u'JPP - Juntos Pelo Povo Santa Cruz'},
 {u'acronym': u'III',
  u'name': u'Lista Independente de Fornelo e Vair\xc3\xa3o (LIFV)'},
 {u'acronym': u'IV', u'name': u'MIV - Movimento Independente Vilarmourense'},
 {u'acronym': u'IX', u'name': u'S\xc3\x83O MARTINHO SEMPRE - SMS'},
 {u'acronym': u'MPT', u'name': u'PARTIDO DA TERRA'},
 {u'acronym': u'PAN', u'name': u'Partido pelos Animais e pela Natureza'},
 {u'acronym': u'PCP - PEV',
  u'name': u'CDU - Coliga\xc3\xa7\xc3\xa3o Democr\xc3\xa1tica Unit\xc3\xa1ria'},
 {u'acronym': u'PCTP/MRPP',
  u'name': u'PARTIDO COMUNISTA DOS TRA