In [1]:
import requests as r
import pandas as pd
from bs4 import BeautifulSoup

%matplotlib inline

In [2]:
url = 'https://results.thecaucuses.org/'

In [3]:
resp = r.get(url)

In [4]:
soup = BeautifulSoup(resp.text)

In [5]:
precinct_table = soup.find('div', {'class': 'precinct-table'})
thead = [li.text for li in soup.find('ul', {'class': 'thead'}).findAll('li')]
sub_head = [li.text for li in soup.find('ul', {'class': 'sub-head'}).findAll('li')]

In [6]:
candidates = [thead[i] for i in range(2,len(thead), 3)]

In [7]:
counties = precinct_table.findAll('div', attrs = {'class': 'precinct-rows'})

In [8]:
county = counties[0]

In [9]:
rows = []
for county in counties:
    county_name = county.find('div', {'class': 'precinct-county'}).find('div', {'class': 'wrap'}).text
    county_data = county.find('div', {'class': 'precinct-data'}).findAll('ul', lambda v: v != 'total-row')
    for row in county_data:
        row_data = [col.text for col in row.findAll('li')]
        precinct_name = row_data[0]
        for i in range(1,len(row_data), 3):
            first_exp, final_exp, sde = row_data[i:(i+3)]
            candidate = candidates[i//3]
            rows.append(dict(county=county_name, precinct=precinct_name, candidate=candidate, 
                             first_exp=int(first_exp), final_exp=int(final_exp), sde=float(sde)))

In [10]:
df = pd.DataFrame(rows).sort_values(['county', 'precinct', 'candidate'])

In [11]:
df.groupby(['candidate', 'county'])[['first_exp', 'final_exp', 'sde']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,first_exp,final_exp,sde
candidate,county,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bennet,Adair,0,0,0.0000
Bennet,Adams,0,0,0.0000
Bennet,Allamakee,1,0,0.0000
Bennet,Appanoose,0,0,0.0000
Bennet,Audubon,0,0,0.0000
...,...,...,...,...
Yang,Winnebago,34,29,0.4500
Yang,Winneshiek,168,49,0.5120
Yang,Woodbury,163,40,0.6375
Yang,Worth,27,23,0.4000


In [12]:
df.to_csv('iowa_data.csv', index=False)

In [17]:
def highlight_max(s):
    '''
    highlight the maximum in a Series green.
    '''
    is_max = s == s.max()
    return ['background-color: yellow' if v else '' for v in is_max]

summary = df.groupby(['candidate'])[['first_exp','final_exp','sde']].sum()
summary = summary.sort_values('sde', ascending=False)
summary.style.apply(highlight_max)

Unnamed: 0_level_0,first_exp,final_exp,sde
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Buttigieg,36718,42235,550.339
Sanders,42672,44753,546.912
Warren,32007,34312,381.258
Biden,25699,23051,331.351
Klobuchar,21896,20525,255.216
Yang,8660,1752,21.6426
Steyer,3001,407,6.6719
Uncommitted,955,1410,3.9571
Other,158,204,0.6931
Bloomberg,214,20,0.2096
