# TSE - Candidates from 2018 and 2022 in brazilian politics

## Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Loading the dataset

In [3]:
tse_data = pd.read_csv('br_tse_eleicoes_2018_2022.csv', low_memory=False)

In [4]:
tse_data.dtypes

ano                       int64
tipo_eleicao             object
sigla_uf                 object
id_municipio            float64
id_municipio_tse        float64
id_candidato_bd         float64
cpf                      object
titulo_eleitoral          int64
sequencial                int64
numero                    int64
nome                     object
nome_urna                object
numero_partido            int64
sigla_partido            object
cargo                    object
situacao                 object
ocupacao                 object
data_nascimento          object
idade                   float64
genero                   object
instrucao                object
estado_civil             object
nacionalidade            object
sigla_uf_nascimento      object
municipio_nascimento     object
email                    object
raca                     object
dtype: object

In [5]:
tse_data.columns

Index(['ano', 'tipo_eleicao', 'sigla_uf', 'id_municipio', 'id_municipio_tse',
       'id_candidato_bd', 'cpf', 'titulo_eleitoral', 'sequencial', 'numero',
       'nome', 'nome_urna', 'numero_partido', 'sigla_partido', 'cargo',
       'situacao', 'ocupacao', 'data_nascimento', 'idade', 'genero',
       'instrucao', 'estado_civil', 'nacionalidade', 'sigla_uf_nascimento',
       'municipio_nascimento', 'email', 'raca'],
      dtype='object')

## Change the column names (pt-br -> en-us)

In [49]:
tse_data = tse_data.rename(columns={'ano':'year',
                        'tipo_eleicao':'election_type',
                        'sigla_uf':'state',
                        'id_municipio':'id_city',
                        'id_municipio_tse':'id_city_tse',
                        'id_candidato_bd':'id_candidate_bd',
                        'titulo_eleitoral':'voter_registration',
                        'sequencial':'sequential',
                        'numero':'number',
                        'nome':'name',
                        'nome_urna':'ballot_name',
                        'numero_partido':'party_number',
                        'sigla_partido':'party_acronym',
                        'cargo':'office',
                        'situacao':'situation',
                        'ocupacao':'occupation',
                        'data_nascimento':'birth_date',
                        'idade':'age',
                        'genero':'gender',
                        'instrucao':'education',
                        'estado_civil':'marital_status',
                        'nacionalidade':'nationality',
                        'sigla_uf_nascimento':'birth_state_acronym',
                        'municipio_nascimento':'birth_city',
                        'raca':'race'})


## Questions about the Dataset

1. Number of parties per state
2. Average age per party
3. Five parties with more candidates
4. Occupation more present in each party
5. Party with more white people
6. Five parties with more "deputados federais" from São Paulo
7. Average age per marital status
8. Party with more women in Brazil

In [63]:
## 1. Number of parties per state

tse_data.groupby('state')['party_acronym'].apply(lambda x: x.value_counts().shape[0])

state
AC    35
AL    37
AM    36
AP    36
BA    38
CE    38
DF    38
ES    37
GO    36
MA    37
MG    38
MS    35
MT    35
PA    38
PB    37
PE    38
PI    38
PR    37
RJ    38
RN    37
RO    34
RR    35
RS    38
SC    37
SE    37
SP    38
TO    34
Name: party_acronym, dtype: int64

In [14]:
## 2. Average age per party

tse_data.groupby('party_acronym')['age'].mean()

party_acronym
AGIR             48.905561
AVANTE           45.475749
CIDADANIA        45.779419
DC               46.844702
DEM              46.267040
MDB              46.990251
NOVO             44.648613
PATRIOTA         45.935545
PC do B          45.173917
PCB              42.836576
PCO              44.424242
PDT              46.201688
PHS              47.170191
PL               45.828519
PMB              47.175061
PMN              46.765380
PODE             45.611245
PP               46.088904
PPL              48.117057
PROS             45.405947
PRP              48.279214
PRTB             46.454408
PSB              45.881629
PSC              45.552717
PSD              45.735165
PSDB             46.950754
PSL              45.047843
PSOL             44.221874
PSTU             48.127586
PT               46.810104
PTB              46.771712
PTC              46.117502
PV               46.497960
REDE             45.048718
REPUBLICANOS     45.401987
SOLIDARIEDADE    45.409172
UNIÃO         

In [18]:
## 3. Five parties with more candidates

tse_data.groupby('party_acronym')['name'].count().sort_values(ascending=False).head(5)

party_acronym
MDB     47594
PSD     41574
PP      40583
PSDB    35069
PT      34203
Name: name, dtype: int64

In [42]:
## 4. Occupation more present in each party

tse_data.groupby('party_acronym')['occupation'].apply(lambda x: x.value_counts(ascending=False).index[0] 
                                                      if str(x.value_counts(ascending=False).index[0]) != 'outros' 
                                                      else x.value_counts(ascending=False).index[1])

## I prefered picking the occupations different of 'outros' (others, in english)

party_acronym
AGIR                                               empresario
AVANTE                                             empresario
CIDADANIA                                          empresario
DC                                                 empresario
DEM                                servidor publico municipal
MDB                                                agricultor
NOVO                                               empresario
PATRIOTA                                           empresario
PC do B                                            agricultor
PCB              estudante bolsista estagiario e assemelhados
PCO                                 professor de ensino medio
PDT                                                agricultor
PHS                                                empresario
PL                                                 empresario
PMB                                                empresario
PMN                                                empre

In [46]:
## 5. Party with more white people

tse_data[tse_data['race'] == 'branca'].groupby('party_acronym')['name'].count().sort_values(ascending=False).head(1)

party_acronym
MDB    26075
Name: name, dtype: int64

In [52]:
## 6. Five parties with more "deputados federais" from São Paulo

tse_data[tse_data['office'] == 'deputado federal'].groupby('party_acronym')['name'].count().sort_values(ascending=False).head()

party_acronym
PATRIOTA        888
PSOL            846
MDB             836
REPUBLICANOS    790
AVANTE          788
Name: name, dtype: int64

In [53]:
## 7. Average age per marital status

tse_data.groupby('marital_status')['age'].mean()

marital_status
casado(a)        48.590926
divorciado(a)    51.001848
solteiro(a)      40.566264
viuvo(a)         58.513366
Name: age, dtype: float64

In [62]:
## 8. Party with more women in Brazil

tse_data[tse_data['gender']=='feminino'].groupby('party_acronym')['name'].count().sort_values(ascending=False).index[0]

'MDB'