In [1]:
import pandas as pd
import numpy as np
from Fraction import Fraction

In [2]:
df = pd.read_csv('data/primary_results.csv')
fract = Fraction()

In [3]:
# Exibindo cabeçalho
df.head()

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
0,Alabama,AL,Autauga,1001.0,Democrat,Bernie Sanders,544,0.182
1,Alabama,AL,Autauga,1001.0,Democrat,Hillary Clinton,2387,0.8
2,Alabama,AL,Baldwin,1003.0,Democrat,Bernie Sanders,2694,0.329
3,Alabama,AL,Baldwin,1003.0,Democrat,Hillary Clinton,5290,0.647
4,Alabama,AL,Barbour,1005.0,Democrat,Bernie Sanders,222,0.078


In [4]:
# Agora farei o agrupamentos dos dados, para que seja possível uma melhor visualização dos dados
df.groupby('candidate')

# Com essa junção posso usar também o aggregate, isso faz com que as informações sejam vinculadas ao agrupamento feito
# Assim consigo obter respostas como o estado que vez a menor quantidade de votos em um canditado ou o cantidato 
# que teve o menor voto e dai por diante

# Abaixo será exibido os candidatos, as preferências e os maximos de votos
df.groupby('candidate').aggregate({'votes': [min, np.mean, max]})

Unnamed: 0_level_0,votes,votes,votes
Unnamed: 0_level_1,min,mean,max
candidate,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
No Preference,0,23.225071,580
Uncommitted,0,0.434343,16
Ben Carson,0,338.258238,9945
Bernie Sanders,0,2844.019501,434656
Carly Fiorina,0,139.366972,3612
Chris Christie,1,223.422018,7144
Donald Trump,0,3709.576408,179130
Hillary Clinton,0,3731.85541,590502
Jeb Bush,2,609.103226,9575
John Kasich,0,1160.052705,101217


In [5]:
# Filtrando locais que tenham mais de 50.000 votos
df.loc[df['votes'] > 50000]

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
73,Alabama,AL,Jefferson,1073.0,Democrat,Hillary Clinton,67357,0.816
763,Arizona,AZ,Maricopa,4013.0,Democrat,Bernie Sanders,86942,0.398
764,Arizona,AZ,Maricopa,4013.0,Democrat,Hillary Clinton,126988,0.581
770,Arizona,AZ,Pima,4019.0,Democrat,Hillary Clinton,56317,0.573
800,Arizona,AZ,Maricopa,4013.0,Republican,Donald Trump,144522,0.462
802,Arizona,AZ,Maricopa,4013.0,Republican,Ted Cruz,72216,0.231
1349,California,CA,Alameda,6001.0,Democrat,Bernie Sanders,91324,0.458
1350,California,CA,Alameda,6001.0,Democrat,Hillary Clinton,107102,0.537
1362,California,CA,Contra Costa,6013.0,Democrat,Hillary Clinton,69809,0.594
1385,California,CA,Los Angeles,6037.0,Democrat,Bernie Sanders,434656,0.420


In [6]:
# Filtrando locais que tenham mais de 500.000 votos
# Perceba que há muitos eleitores, mas a fração de votos é pequena
df.loc[df['votes'] > 500000]

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
1386,California,CA,Los Angeles,6037.0,Democrat,Hillary Clinton,590502,0.57


In [7]:
# Aqui vamos identificar os estados com as maiores frações de votos
df.groupby('state').aggregate({'fraction_votes': [min, np.mean, max]})

Unnamed: 0_level_0,fraction_votes,fraction_votes,fraction_votes
Unnamed: 0_level_1,min,mean,max
state,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Alabama,0.005,0.275793,0.93
Alaska,0.0,0.285729,1.0
Arizona,0.038,0.361693,0.674
Arkansas,0.0,0.269971,0.896
California,0.042,0.388228,0.847
Colorado,0.174,0.481016,0.811
Connecticut,0.065,0.391453,0.754
Delaware,0.116,0.3912,0.711
Florida,0.016,0.318236,0.764
Georgia,0.011,0.281687,0.91


In [8]:
# E ainda a fração de votos para cada candidato
df.groupby('candidate').aggregate({'fraction_votes': [min, np.mean, max]})

Unnamed: 0_level_0,fraction_votes,fraction_votes,fraction_votes
Unnamed: 0_level_1,min,mean,max
candidate,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
No Preference,0.0,0.006484,0.03
Uncommitted,0.0,0.000455,0.013
Ben Carson,0.0,0.058941,0.415
Bernie Sanders,0.0,0.493316,1.0
Carly Fiorina,0.0,0.022097,0.117
Chris Christie,0.002,0.017773,0.087195
Donald Trump,0.0,0.466217,0.915
Hillary Clinton,0.0,0.461302,1.0
Jeb Bush,0.004,0.044524,0.121
John Kasich,0.0,0.122869,0.639


In [9]:
# Alguns candidatos tiver fração total dos votos em alguns estados, 
# vejamos os candidatos Ted Cruz, Hillary Clinton e Bernie Sanders, 
# e em qual estado eles venceram
df[df['fraction_votes'] == 1.000000]

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
475,Alaska,AK,State House District 12,90200112.0,Democrat,Bernie Sanders,10,1.0
499,Alaska,AK,State House District 23,90200123.0,Democrat,Bernie Sanders,13,1.0
513,Alaska,AK,State House District 3,90200103.0,Democrat,Bernie Sanders,7,1.0
517,Alaska,AK,State House District 31,90200131.0,Democrat,Bernie Sanders,16,1.0
519,Alaska,AK,State House District 32,90200132.0,Democrat,Bernie Sanders,13,1.0
531,Alaska,AK,State House District 38,90200138.0,Democrat,Bernie Sanders,17,1.0
537,Alaska,AK,State House District 40,90200140.0,Democrat,Bernie Sanders,12,1.0
539,Alaska,AK,State House District 5,90200105.0,Democrat,Bernie Sanders,15,1.0
541,Alaska,AK,State House District 6,90200106.0,Democrat,Bernie Sanders,12,1.0
545,Alaska,AK,State House District 8,90200108.0,Democrat,Bernie Sanders,7,1.0


In [10]:
# Verificando os estados apenas onde Hillary Clinton ganhou
df[(df['fraction_votes'] == 1.0) & (df['candidate'] == 'Hillary Clinton')]

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
8142,Maine,ME,Amity,92300012.0,Democrat,Hillary Clinton,1,1.0
8160,Maine,ME,Atkinson,92300020.0,Democrat,Hillary Clinton,1,1.0
8168,Maine,ME,Avon,92300024.0,Democrat,Hillary Clinton,1,1.0
8186,Maine,ME,Beaver Cove,92300033.0,Democrat,Hillary Clinton,1,1.0
8188,Maine,ME,Beddington,92300034.0,Democrat,Hillary Clinton,1,1.0
8292,Maine,ME,Caswell,92300088.0,Democrat,Hillary Clinton,1,1.0
8334,Maine,ME,Crawford,92300111.0,Democrat,Hillary Clinton,1,1.0
8400,Maine,ME,Edinburg,92300144.0,Democrat,Hillary Clinton,1,1.0
8576,Maine,ME,Lakeville,92300235.0,Democrat,Hillary Clinton,1,1.0
8650,Maine,ME,Masardis,92300274.0,Democrat,Hillary Clinton,1,1.0


In [15]:
df.groupby('state').filter(fract.fract_votes_filter)

Unnamed: 0,state,state_abbreviation,county,fips,party,candidate,votes,fraction_votes
0,Alabama,AL,Autauga,1001.0,Democrat,Bernie Sanders,544,0.182
1,Alabama,AL,Autauga,1001.0,Democrat,Hillary Clinton,2387,0.800
2,Alabama,AL,Baldwin,1003.0,Democrat,Bernie Sanders,2694,0.329
3,Alabama,AL,Baldwin,1003.0,Democrat,Hillary Clinton,5290,0.647
4,Alabama,AL,Barbour,1005.0,Democrat,Bernie Sanders,222,0.078
5,Alabama,AL,Barbour,1005.0,Democrat,Hillary Clinton,2567,0.906
6,Alabama,AL,Bibb,1007.0,Democrat,Bernie Sanders,246,0.197
7,Alabama,AL,Bibb,1007.0,Democrat,Hillary Clinton,942,0.755
8,Alabama,AL,Blount,1009.0,Democrat,Bernie Sanders,395,0.386
9,Alabama,AL,Blount,1009.0,Democrat,Hillary Clinton,564,0.551


In [None]:
# Teste real
df[df['state_abbreviation'] == 'AL']['votes'].sum()

In [16]:
# Agrupando os estados e seus candidatos e a quantidade de votos para cada candidato
df.groupby(['state', 'candidate'])['votes'].sum()

state          candidate      
Alabama        Ben Carson           87517
               Bernie Sanders       76399
               Donald Trump        371735
               Hillary Clinton     309928
               John Kasich          37970
               Marco Rubio         159802
               Ted Cruz            180608
Alaska         Ben Carson            2401
               Bernie Sanders         440
               Donald Trump          7346
               Hillary Clinton         99
               John Kasich            892
               Marco Rubio           3318
               Ted Cruz              7973
Arizona        Bernie Sanders      163400
               Donald Trump        249916
               Hillary Clinton     235697
               John Kasich          53040
               Ted Cruz            132147
Arkansas       Ben Carson           23173
               Bernie Sanders       64868
               Donald Trump        133144
               Hillary Clinton     144580
   