-
Notifications
You must be signed in to change notification settings - Fork 0
/
ac-election-cruncher.py
213 lines (160 loc) · 7.42 KB
/
ac-election-cruncher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from io import StringIO
from styleframe import StyleFrame
import re
import pandas as pd
import numpy as np
# paths
electionResultText = '/home/andy/Desktop/2019PrimaryRecanvass.txt'
enrollmentXLS = '/home/andy/Documents/GIS.Data/enroll/feb19-Enrollment/AlbanyED_feb19.xlsx'
outputPath = '/tmp/2019_primary_albany_county_races.xlsx'
yr = '2019'
# number to excel column letter
def letter(colNum):
import math
if colNum > 26:
return chr(ord('@')+math.floor(colNum/26))+chr(ord('@')+colNum%26)
else:
return chr(ord('@')+colNum)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None) # show everything when previewing
with open(electionResultText) as f:
data = f.read()
enroll = pd.read_excel(enrollmentXLS, header=4)
enroll = enroll[(enroll['STATUS']=='Active')].dropna()
enroll['ELECTION DIST'] = enroll['ELECTION DIST'].astype(str).str.replace(' ',' ')
# array containing election result dataframes
er = {}
# array containing election result dataframes
er = {}
# split each race which is divided by ten or more equal signs
for raceData in re.split('={10,}',data):
# blank out old data frame
df = None
# split lines
rows = raceData.split('\n')
# if this is a primary, then pull the party
# from the primary
party = rows[2].strip()[0:3]
race = ""
candidates = {}
startAt = 0
for i, line in enumerate(rows):
# find race name
if re.search('VOTES\s*?PERCENT', line):
if party in enroll.columns:
race = party + ' ' + rows[i+1]
race = race.replace(' ',' ')
race = race.rstrip()
# find candidate names
for result in re.findall('(\d\d)\s*?=\s*?(\w.*?)\d', line):
if party in enroll.columns:
candidates[int(result[0])] = result[1].rstrip() + \
" (" + party + ")"
else:
candidates[int(result[0])] = result[1].rstrip()
# find start at location for CSV reader
if re.findall('-{5,}', line):
startAt = i+3
break
# skips enrollment stats, as the don't have candidates
if not candidates or not 2 in candidates:
continue
if not race:
continue
#print(race)
df = pd.read_csv(
StringIO(raceData),
header=None,
skiprows=startAt,
sep='(?<=\d)\s{1,}(?=\d)',
engine='python',
on_bad_lines='warn')
df=df[df[0].str.contains('^\d{4}').fillna(False)] # ONLY ROWS STARTING WITH 4-DIGIT ED CODE
df.reset_index(drop=True, inplace=True) # reset index so we can use it in formulas
df=df.rename(candidates,axis=1) # rename columns
df.iloc[:,1:]=df.iloc[:,1:].apply(pd.to_numeric) # make sure all columns are numeric
df.iloc[:,1:]=df.iloc[:,1:].convert_dtypes('int32') # cast columns to int32
# crunching
df.insert(1, 'Ballot', df.iloc[:,1:].convert_dtypes('int32').sum(axis=1)) # add total column
try:
df.insert(2, 'Blanks', df['OVER VOTES'].convert_dtypes('int32')) # add blank
df.drop(labels=['OVER VOTES'], axis=1, inplace=True)
except:
print('No over votes/void')
df.insert(2, 'Blanks', 0)
try:
df['Blanks']+=df['UNDER VOTES'].convert_dtypes('int32') # add under votes
df.drop(labels=['UNDER VOTES'], axis=1, inplace=True)
except:
print('No blanks/under vote')
df.insert(3, 'Canvas', '=F'+(df.index+2).map(str)+'-G'+(df.index+2).map(str))
df.insert(4, 'TO %', '=(F'+(df.index+2).map(str)+'/E'+(df.index+2).map(str)+')')
df.insert(5, 'DO %', '=(G'+(df.index+2).map(str)+'/F'+(df.index+2).map(str)+')')
# create check columns
df['CHECK'] = '=F'+(df.index+2).map(str)+'-G'+(df.index+2).map(str)
df['CHECK %'] = '=0'
# add percent columns
for i, col in enumerate(df.columns[6:-2]):
try:
# check columns
if (len(df.columns)-i-2) > 0:
# add to check column
df['CHECK'] += '-'+letter(i+i+11)+(df.index+2).map(str)
# add check percent
df['CHECK %'] += '+'+letter(i+i+12)+(df.index+2).map(str)
# add percent
df.insert(i+i+7, col+' %', '=('+letter(i+i+11)+(df.index+2).map(str)+'/H'+(df.index+2).map(str)+')')
except:
pass
# add T W E Columns, drop combined field
df[['ED Code','Municipality','String','Ward','ED']]=df[0].str.extract('(\d\d\d\d)\s*?(.*?)(\s*?WARD\s*(\d*))?\s*?ED\s*(\d*)')
# temporary string used for data merge with enrollments
df['ELECTION DIST'] = df['Municipality'].str.strip().replace(' ',' ') + \
' '+df['Ward'].fillna(0).astype(str).str.zfill(3)+df['ED'].str.zfill(3).astype('str')
# merge on ED Key
if party in enroll.columns:
enrollParty = enroll[['ELECTION DIST',party]]
enrollParty = enrollParty.rename(columns={ party : 'Feb '+yr+ ' '+ party.title() + ' Enroll'})
else:
enrollParty = enroll[['ELECTION DIST', 'TOTAL']]
enrollParty = enrollParty.rename(columns={'TOTAL': 'Nov '+yr+ ' Enrollment'})
df=df.merge(enrollParty, on='ELECTION DIST', how='left')
df=df.drop(0, axis=1)
df=df.drop(labels=['ELECTION DIST','String'],axis=1)
# move columns to proper order
cols = list(df.columns)
df = df[cols[-5:]+cols[:-5]]
df['Municipality']=df['Municipality'].str.title()
df['Municipality'] = df['Municipality'].str.strip().replace(' ',' ') # remove whitespace around muni column
# array with election result dataframes
er[race]=df.fillna(0)
# write file
ew = pd.ExcelWriter(outputPath)
for i, race in enumerate(er):
raceStr = str(i+1)+' '+race
if (len(race)>28):
raceStr = str(i+1)+' '+race[:10] + '...' + race[-10:]
#print(raceStr)
# disble header, manually write, as pre-defined headers can't be formatted
er[race].fillna(0).to_excel(ew,sheet_name=raceStr, index=False)
headForm = ew.book.add_format({'text_wrap': 1, 'font_family': 'Arial', 'bold': True ,
'valign': 'vcenter', 'align': 'center', 'bg_color': '#CCCCCC'})
for colnum, value in enumerate(er[race].columns.values):
ew.sheets[raceStr].write(0, colnum, value, headForm)
ew.sheets[raceStr].set_row(0,50)
# set column width for all columns to 20, freeze panes
bodyForm = ew.book.add_format({'text_wrap': 1, 'font_name': 'Arial', 'num_format': '#,##0', 'valign': 'vcenter', 'align': 'right'})
bodyPerForm = ew.book.add_format({'text_wrap': 1, 'font_name': 'Arial', 'num_format': '0.0%', 'valign': 'vcenter', 'align': 'right'})
muniForm = ew.book.add_format({'text_wrap': 1, 'font_name': 'Arial', 'valign': 'vcenter', 'align': 'center'})
ew.sheets[raceStr].set_column('A1:A9999', 6, bodyForm)
ew.sheets[raceStr].set_column('B1:B9999', 15, muniForm)
ew.sheets[raceStr].set_column('C1:D9999', 4, bodyForm)
ew.sheets[raceStr].set_column('E1:H9999', 10, bodyForm)
ew.sheets[raceStr].set_column('I1:J9999', 10, bodyPerForm)
for colNum in range(11,40):
if colNum%2:
ew.sheets[raceStr].set_column(letter(colNum)+'1:'+letter(colNum)+'9999', 12, bodyForm)
else:
ew.sheets[raceStr].set_column(letter(colNum)+'1:'+letter(colNum)+'9999', 12, bodyPerForm)
ew.sheets[raceStr].freeze_panes('E2')
ew.save()