# Parsing Butler County Election results

The following notebook parses the archived 2018 precinct level election results from Butler County from plain text into a dataframe (spreadsheet like data structure) to be used for matching.

Steps:

1. Copy the plain text data from Butler County's [website]( https://www2.co.butler.pa.us/election/2018gen/home.htm)
2. Store the plain text locally at `butler_county_raw.txt`
3. Use Excel's column parsing feature to create `butler_county.csv`
4. Use this script to parse the primitive csv into a csv matching Open Elections style for its statewide precinct level resutls. 
5. Store the resulting file locally at `butler_county_parsed.csv`

In [1]:
import pandas as pd
import numpy as np
import math
import os
import re
os.getcwd()

'/Users/baxterdemers/pa-2018/parsing_election_results/butler'

In [2]:
df = pd.read_csv('butler_county.csv')
df.head()

Unnamed: 0,candidate,votes,election_day,absentee,provisional
0,0001 ADAMS TOWNSHIP 1,,,,
1,TOTAL,,,,
2,REGISTERED VOTERS - TOTAL . . . . . .,3957.0,,,
3,BALLOTS CAST - TOTAL. . . . . . . .,2446.0,2323.0,122.0,1.0
4,VOTER TURNOUT - TOTAL . . . . . . .,,,,


In [3]:
output = pd.DataFrame(columns=['county', 'precinct', 'office', 'district', 'candidate', 'party','votes', 'absentee', 'election_day'])

In [4]:
d = {
    'STRAIGHT PARTY VOTING':'Straight Party', 
    'UNITED STATES SENATOR':'U.S. Senate',
    'GOVERNOR':'Governor',
    'REPRESENTATIVE IN CONG':'U.S. House', 
    'REPRESENTATIVE IN THE':'State House',
}

### Main parsing script

In [5]:
prev_blank = False
lst = []
first = True
for idx, row in df.iterrows():
    can = row.candidate
    if type(can) != str and math.isnan(can):
        prev_blank = True
        continue
    elif str(can).startswith('00'):
        prec = can
    elif can.startswith('REPORT') or can.split()[0] in {'TOTAL', 'REGISTERED', 'BALLOTS', 'VOTER', 'DISTRICT', 'VOTE', 'WRITE-IN.', 'PREC', 'RUN'}:
        continue
    elif prev_blank:
        if "CONGRESS" in can:
            office = 'U.S. House'
            temp = re.findall(r'\d+', can.split('CONGRESS')[1]) 
            district = list(map(int, temp))[0]
        elif "GENERAL ASSEMBLY" in can:
            office = 'State House'
            temp = re.findall(r'\d+', can.split('GENERAL ASSEMBLY')[1]) 
            district = list(map(int, temp))[0]
        else:
            district = np.nan
            office = d[can.strip()]
        prev_blank = False
    else:
        splits = can.split('(')
        can_name = splits[0].strip()
        party = 'DEM' if can in {'LISA BOEVING-LEARNED'} else splits[1].split(')')[0]
        if can_name == 'EBERT G BILL BEEMAN':
            party = 'LIB'
        res = {
            'county':'Butler',
            'precinct':prec,
            'office':office,
            'district':district,
            'candidate':can_name,
            'party':party,
            'votes':row.votes, 
            'absentee':row.absentee, 
            'election_day':row.election_day,
        }
        lst.append(res)
        if first:
            print(res)
            first = False
            
output = output.append(lst)

{'county': 'Butler', 'precinct': '0001 ADAMS TOWNSHIP 1', 'office': 'Straight Party', 'district': nan, 'candidate': 'DEMOCRATIC', 'party': 'DEM', 'votes': '521', 'absentee': '29', 'election_day': '491'}


### Validation

In [6]:
output[output.party==''].candidate.unique()

array([], dtype=object)

In [7]:
output.party.unique()

array(['DEM', 'REP', 'GRN', 'IND', 'LIB', 'DEM/REP'], dtype=object)

In [8]:
output.head(20)

Unnamed: 0,county,precinct,office,district,candidate,party,votes,absentee,election_day
0,Butler,0001 ADAMS TOWNSHIP 1,Straight Party,,DEMOCRATIC,DEM,521,29,491
1,Butler,0001 ADAMS TOWNSHIP 1,Straight Party,,REPUBLICAN,REP,811,53,758
2,Butler,0001 ADAMS TOWNSHIP 1,Straight Party,,GREEN,GRN,1,0,1
3,Butler,0001 ADAMS TOWNSHIP 1,Straight Party,,INDEPENDENT,IND,0,0,0
4,Butler,0001 ADAMS TOWNSHIP 1,Straight Party,,LIBERTARIAN,LIB,1,0,1
5,Butler,0001 ADAMS TOWNSHIP 1,U.S. Senate,,"BOB CASEY, JR",DEM,1047,50,996
6,Butler,0001 ADAMS TOWNSHIP 1,U.S. Senate,,LOU BARLETTA,REP,1369,72,1297
7,Butler,0001 ADAMS TOWNSHIP 1,U.S. Senate,,NEAL GALE,GRN,6,0,6
8,Butler,0001 ADAMS TOWNSHIP 1,U.S. Senate,,"DALE R KERNS, JR",LIB,15,0,15
9,Butler,0001 ADAMS TOWNSHIP 1,Governor,,TOM WOLF,DEM,1115,48,1066


In [9]:
output.to_csv('butler_county_parsed.csv',index=False)