In [2]:
import csv
import json
import threading
import statistics
from operator import attrgetter
from itertools import groupby

In [3]:
class Election2016(object):
    def __init__(self, dict):
        self.cod_municipio_tse = int(dict['cod_municipio_tse'])
        self.uf = dict['uf']
        self.nome_municipio = dict['nome_municipio']
        self.total_eleitores = int(dict['total_eleitores'])
        self.f_16 = int(dict['f_16'])
        self.f_17 = int(dict['f_17'])
        self.f_18_20 = int(dict['f_18_20'])
        self.f_21_24 = int(dict['f_21_24'])
        self.f_25_34 = int(dict['f_25_34'])
        self.f_35_44 = int(dict['f_35_44'])
        self.f_45_59 = int(dict['f_45_59'])
        self.f_60_69 = int(dict['f_60_69'])
        self.f_70_79 = int(dict['f_70_79'])
        self.f_sup_79 = int(dict['f_sup_79'])
        self.gen_feminino = int(dict['gen_feminino'])
        self.gen_masculino = int(dict['gen_masculino'])
        self.gen_nao_informado = int(dict['gen_nao_informado'])

    def __str__(self):
        return self.nome_municipio

In [4]:
def read_file():
    with open('/tmp/BR_eleitorado_2016_municipio.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f, delimiter=';')
        return [Election2016(p) for p in reader]

In [5]:
def write_file(electionList, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as w:
        writer = csv.DictWriter(w, delimiter=';', fieldnames=['cod_municipio_tse', 'uf', 'nome_municipio', 'total_eleitores',
                                    'f_16', 'f_17', 'f_18_20', 'f_21_24', 'f_25_34', 'f_35_44','f_45_59',
                                    'f_60_69', 'f_70_79', 'f_sup_79', 'gen_feminino', 'gen_masculino',
                                    'gen_nao_informado'])
        writer.writeheader()
        writer.writerows(list(map(vars, electionList)))

In [6]:
def group_female_by_uf(ufs, grouped_by_uf):
    # summarize females by UF
    sum_by_uf = [sum(x.gen_feminino for x in y) for y in grouped_by_uf]

    # group UF and sum of females
    return list(zip(ufs, sum_by_uf))

In [7]:
def save_statistics(group_female_uf):
    sum_by_uf = [s[1] for s in group_female_uf]
    
    jsonContent = {
        "Media aritmetica": statistics.mean(sum_by_uf),
        "Mediana" : statistics.median(sum_by_uf),
        "Variancia" : statistics.variance(sum_by_uf),
        "Desvio Padrao" : statistics.stdev(sum_by_uf)
    }
    
    with open('pleito_2016_stat.json', 'w') as f:
        json.dump(jsonContent, f, indent=4)


In [8]:
def save_election_by_uf(grouped_by_uf):
    for election_uf in grouped_by_uf:
        threading.Thread(target=write_file, args=(election_uf, '/tmp/' + election_uf[0].uf + '.csv')).start()

In [9]:
# read the file
election = read_file()

# sort and write into sorted file
sorted_election = sorted(election, key=attrgetter('uf', 'cod_municipio_tse'))
write_file(sorted_election, '/tmp/pleito_2016_classificado.csv')

# get UFs and group by them
ufs = [k for k, g in groupby(sorted_election, key=attrgetter('uf'))]
grouped_by_uf = [list(g) for k, g in groupby(sorted_election, key=attrgetter('uf'))]

# female statistics
group_female = group_female_by_uf(ufs, grouped_by_uf)
save_statistics(group_female)

# save grouped by UF
save_election_by_uf(grouped_by_uf)