In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import scipy.stats

import statsmodels.api as sm


%matplotlib inline

plt.rc('axes', labelsize=20) 
plt.rcParams["figure.figsize"] = (10,10)
plt.rc('xtick', labelsize=13)
plt.rc('ytick', labelsize=13) 
mpl.rcParams['axes.titlesize'] = 24

In [54]:
election_results = pd.read_csv('data/processed_data/elections_camaras_municipais_portugal.csv')

In [55]:
# Add column indicating if candidate is winner
winners_indexes = []
for i, concelho_year in election_results[['concelho', 'year']].drop_duplicates().iterrows():
    # Index of the candidate with most votes of this concelho
    index_winner = election_results[
        (election_results.concelho == concelho_year.concelho) & 
        (election_results.year == concelho_year.year)
    ].votes.argmax()
    assert index_winner not in winners_indexes
    winners_indexes.append(index_winner)
    

election_results['winner'] = election_results.apply(lambda candidate: candidate.name in winners_indexes, 1)

In [39]:
def percentage_of_gender_candidates(gender, year, election_results):
    election_results_year = election_results[election_results.year == year]

    counts_gender_year = election_results_year.gender.value_counts()
    return counts_gender_year[gender] / float(sum(counts_gender_year)) * 100

In [40]:
print '% of Woman Candidates (2009)'
print percentage_of_gender_candidates('female', 2009, election_results)
print
print '% of Woman Candidates (2013)'
print percentage_of_gender_candidates('female', 2013, election_results)

% of Woman Candidates (2009)
12.8690386071

% of Woman Candidates (2013)
13.6397331357


In [41]:
def percentage_of_gender_winners(gender, year, election_results):
    election_winners_year = election_results[(election_results.year == year) & (election_results.winner)]
    counts_gender_year = election_winners_year.gender.value_counts()
    return counts_gender_year[gender] / float(sum(counts_gender_year)) * 100

In [106]:
print '% of Woman Winners (2009)'
print percentage_of_gender_winners('female', 2009, election_results)
print
print '% of Woman Winners (2013)'
print percentage_of_gender_winners('female', 2013, election_results)

% of Woman Winners (2009)
7.46753246753

% of Woman Winners (2013)
7.46753246753


In [107]:
election_winners_2009 = election_results[(election_results.year == 2009) & (election_results.winner)]
election_winners_2013 = election_results[(election_results.year == 2013) & (election_results.winner)]

In [108]:
concelhos_with_woman_elected_2009 = set(election_winners_2009[election_winners_2009.gender == 'female'].concelho.sort_values())

concelhos_with_woman_elected_2013 = set(election_winners_2013[election_winners_2013.gender == 'female'].concelho.sort_values())

In [104]:
concelhos_with_woman_elected_2009 - concelhos_with_woman_elected_2013

{'ALMADA',
 'ANGRA DO HERO\xc3\x8dSMO',
 'CAMINHA',
 'MIRANDA DO CORVO',
 'MONTIJO',
 'NELAS',
 'PALMELA',
 'PONTA DELGADA',
 'SALVATERRA DE MAGOS',
 'TROFA',
 'VILA DE REI',
 'VILA FRANCA DE XIRA',
 'VILA VELHA DE R\xc3\x93D\xc3\x83O'}

In [105]:
concelhos_with_woman_elected_2013 - concelhos_with_woman_elected_2009

{'ALANDROAL',
 'AMADORA',
 'ANADIA',
 'ARRAIOLOS',
 'CONST\xc3\x82NCIA',
 'FREIXO DE ESPADA \xc3\x80 CINTA',
 'LAGOS',
 'MONTEMOR-O-NOVO',
 'MOUR\xc3\x83O',
 'PORTALEGRE',
 'PORTIM\xc3\x83O',
 'TOMAR',
 'VILA DO CONDE'}