Skip to content

Commit

Permalink
Merge pull request #1 from mlambright/master
Browse files Browse the repository at this point in the history
update fork
  • Loading branch information
stwlam committed Nov 2, 2016
2 parents 8010217 + c2c823f commit 40ac3e5
Show file tree
Hide file tree
Showing 13 changed files with 984 additions and 75 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#EA-Tools 2014
This repository contains a flask app which managed processes for VIP QA and DECC through the 2014 Election. VIP-specific scripts are stored in the vip folder, and decc-specific scripts are stored in the decc folder. Each folder, except app, contains its own README detailing the included scripts.


##For this app to run, the following environment variables must be defined:
###For DECC:
+ *DECCINPUT:* the directory to use to input to decc scripts
+ *DECCOUTPUT:* the directory to output decc files after processing
+ *PGHOST:* the URL or IP of the DECC database server
+ *PGUSER:* the username to connect to the DECC database
+ *PGPASSWORD:* the password associated with PGUSER
+ *PGDB:* the DECC database name

###For VIP QA:
+ *GOOGLE_NATIVE_APP_CLIENT_ID:* The client ID associated with the VIP QA app
+ *GOOGLE_NATIVE_APP_CLIENT_SECRET:* The client secret associated with the VIP QA app
+ *GOOGLE_PUBLIC_API_KEY:* The API key used to query the Google civicInfo API
+ *GOOGLE_GEOCODE_API_KEY:* The API key used to query the Google geocode API
+ *VIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA election-day voting sites
+ *EVIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA early voting sites
31 changes: 18 additions & 13 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
from oauth2client.client import OAuth2WebServerFlow
import os

api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID')
api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET')
api_key = os.getenv('GOOGLE_PUBLIC_API_KEY')
geokey = os.getenv('GOOGLE_GEOCODE_API_KEY')
vip_qa_data = os.getenv('VIPQADATA')
ev_qa_data = os.getenv('EVIPQADATA')
#This import is needed for VIP
from oauth2client.client import OAuth2WebServerFlow

#These are generally useful
CSRF_ENABLED = True
SECRET_KEY = os.getenv('SECRET_KEY')

#These variables configure the DECC scripts
deccinputdir = os.getenv('DECCINPUT')
deccoutputdir = os.getenv('DECCOUTPUT')
HOST = os.getenv('PGHOST')
USER = os.getenv('PGUSER')
DB = os.getenv('PGDB')
PASSWORD = os.getenv('PGPASSWORD')

CSRF_ENABLED = True
SECRET_KEY = os.getenv('SECRET_KEY')

#These are all VIP Variables
api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID')
api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET')
api_key = os.getenv('GOOGLE_PUBLIC_API_KEY')
geokey = os.getenv('GOOGLE_GEOCODE_API_KEY')
vip_qa_data = os.getenv('VIPQADATA')
ev_qa_data = os.getenv('EVIPQADATA')
states = {'AL': 'Alabama', 'AR': 'Arkansas', 'AZ': 'Arizona', 'ME': 'Maine',
'NH': 'New Hampshire', 'TN': 'Tennessee', 'LA': 'Louisiana',
'IL': 'Illinois', 'IN': 'Indiana', 'ID': 'Idaho', 'GA': 'Georgia',
'MA': 'Massachusetts', 'SD': 'South Dakota', 'VT': 'Vermont',
'FL': 'Florida', 'MS': 'Mississippi'}

'FL': 'Florida', 'MS': 'Mississippi', 'KY': 'Kentucky',
'TX': 'Texas', 'SC': 'South Carolina', 'WV': 'West Virginia',
'NM': 'New Mexico'}
scope1 = 'https://spreadsheets.google.com/feeds'
scope2 = 'https://www.googleapis.com/auth/drive'
scope = '{0} {1}'.format(scope1, scope2)
Expand All @@ -31,6 +37,5 @@
client_secret=api_secret,
scope=scope,
redirect_uri=redirect)

vipTemplateKey = '1qcqHBizQeFJwXsORMS_QS59gywuT9TRifwQe4BM_G3E'
evTemplateKey = '1_uEKMFrFxfu69Ws-2QbmUPm1kFNMY5txGJzG8bfzK4s'
34 changes: 34 additions & 0 deletions decc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#DECC Processing Scripts

These scripts handle decc processing as managed by the Flask app. Each script is detailed below

+ *processScans.py* processes newly received digital or physical orders.
+ *findClients()* accepts a psycopg2 cursor object and queries the DECC database to list clients.
+ *getProject()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns a list of projects associated with the provided client ID.
+ *findOrders()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of orders associated with the provided project ID.
+ *createOrder()* accepts a project ID and psycopg2 cursor object. It inserts a new order record into the DECC database.
+ *findTypes()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of form types associated with the provided project ID.
+ *createPart()* accepts an order ID, type ID, state, booleans indicating whether the order is rush, will be uploaded to van, matched to vendor, or sent to quad, a psycopg2 cursor object, and a psycopg2 db connection object. It inserts a new part record into the DECC database and returns the part ID.
+ *obtainStartNum()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns the next batch number associated with the given client.
+ *processPDF()* is run for digitally transmitted orders. It accepts an input directory, output directory, starting batch number, part ID, psycopg2 cursor object, and psycopg2 database connection object. It iterates over every file listed recursively in the input directory and inserts a new batch record (including total pages) in the DECC database for each. It returns the ending batch number and the total number of pages processed.
+ *processPhysical()* is run for physically shipped orders. It accepts an input file, output file, part ID, starting batch number, psycopg2 database connection object, psycopg2 cursor object, and order ID. It reads the input file and creates a new batch record for each row using the 'Batch Name' column in the input file. It the writes out all batches created with name and ID.
+ *getCursor()* accepts a host, database, username, and password, and returns a psycopg2 cursor object and a psycopg2 database connection object.


+ *processXLSX.py* process returned data from the Data-entry vendor.
+ *getBatches()* accepts a psycopg2 cursor object and returns a dictionary listing all DECC batch information from the DECC database.
+ *writeFile()* accepts a list of row dictionaries to be written, an output filename, and a list of headers. It writes out the list of dictionaries with the given headers to the output filename.
+ *processXLSX()* accepts an input filename referencing an Excel file, a psycopg2 database connection object, and a psycopg2 cursor object. It reads in the excel file, and iterates over each row matching to its original batch name. It then updates batch entries with the final number of records.
+ *main()* accepts a boolean indicating whether a file contains VR records, an input filename, and an output filename. It connects to the DECC database, reads in the input file, runs processXLSX, and calls vrqc.py if the file is voter registration. It then outputs to the output file.

+ *vrqc.py* runs quality checks on returned voter registration data.
+ *readCSV()* accepts a filename and returns a list of dictionaries containing data for each row.
+ *writeCSV()* accepts a list of dictionaries containing row data, an output filename, and a list of headers. It writes the list of dictionaries out to the output filename using the list of headers.
+ *getFIPS()* accepts a url containing FIPS code translation data. It returns a dictionary mapping FIPS codes to county names, and a dictionary mapping county names to state abbreviations.
+ *getZipURL()* accepts the URL of the page listing HUD zip-FIPS code mapping files, and obtains the URL of the most recent HUD file mapping Zip Codes to county FIPS codes.
+ *getZips()* accepts a URL of a HUD file mapping zip codes to county FIPS codes. It returns a list of dictionaries with keys ZIP and FIPS.
+ *buildZipTranslator()* accepts the FIPS dictionary created in getFIPS(), and the list created by getZips() and creates a single dictionary with zip codes as keys, and as values, a list of dictionaries with STATE and COUNTY as keys
+ *inspectRows()* accepts the list of row dictionaries, the zip translator, and the stateDict. It iterates over each row of voter registration data and checks whether the data included make any sense. It returns an updated list of row dictionaries, and an aggregate report.
+ *report()* writes out as JSON the object passed to it as an argument.
+ *concatenateFields()* concatenates the values for addresses and dates to create values that are more acceptable to VAN.
+ *run()* accepts as argument the list of Dictionaries from processXLSX.py, and returns a final QC'd version of that list.
10 changes: 5 additions & 5 deletions decc/processScans.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def findClients(cursor):

def getProject(clientID, cursor):
cursor.execute('''SELECT project_id
FROM decc_form_client
WHERE id = {0}
FROM decc_form_client
WHERE id = {0}
'''.format(clientID))
value = cursor.fetchall()[0][0]
return value
Expand Down Expand Up @@ -67,9 +67,9 @@ def createPart(orderID, typeID, state, rush, van, match, quad, cursor, db):
match))
db.commit()
cursor.execute('''SELECT MAX(id)
FROM decc_form_part
WHERE order_id = {0}
'''.format(orderID))
FROM decc_form_part
WHERE order_id = {0}
'''.format(orderID))
result = cursor.fetchall()[0][0]
return result

Expand Down
99 changes: 59 additions & 40 deletions vip/FL.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@ def getValues(row):
city = row['vf_reg_cass_city']
zipcode = row['vf_reg_cass_zip']
county = row['vf_county_name']
return num, predir, name, suffix, postdir, city, zipcode, county
date = str(row['voterbase_dob'])
lastName = row['tsmart_last_name']
if len(date) == 8:
dob = '{0}/{1}/{2}'.format(date[4:6], date[6:8], date[:4])
return num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName


def getHiddenValues(form):
Expand All @@ -24,15 +28,6 @@ def getHiddenValues(form):
return fields


def getCounties(soup):
counties = {}
selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty'
select = soup.find('select', {'name': selectName})
for item in select.find_all('option'):
counties[item.text.strip().upper()] = item.get('value')
return counties


def matchString(string, stringList):
maximum = 0
string = str(string.strip().upper())
Expand Down Expand Up @@ -160,35 +155,59 @@ def getLee(num, predir, name, suffix, postdir, zipcode):
return ppid, name, address


def electionsFL(lastName, dob, num, county):
url = 'https://www.electionsfl.org/VoterInfo/asmx/service1.asmx/'
county = county.replace(' ', '').lower()
header = {'Content-Type': 'application/json; charset=UTF-8'}
session = Session()
payload = {'LastName': lastName, 'BirthDate': dob, 'StNumber': num,
'County': county, 'FirstName': '', 'challengeValue': '',
'responseValue': ''}
response = session.post(url + 'FindVoter', data=json.dumps(payload),
headers=header)
print response.text
voterID = str(json.loads(json.loads(response.text)['d'])[0]['FVRSVoterIdNumber'])
payload = {'FVRSVoterIDNumber': voterID, 'CurCounty': county}
response = session.post(url + 'GetElectionInfo', data=json.dumps(payload),
headers=header)
print response.text
data = json.loads(json.loads(response.text)['d'])[0]
ppid = ''
name = data['place_name']
address = data['office_location']
return ppid, name, address


def run(row):
num, predir, name, suffix, postdir, city, zipcode, county = getValues(row)
while True:
try:
if county.upper() == 'PALM BEACH':
url = 'https://www.pbcelections.org/'
eid = '139'
pollingInfo = precinctFinder(url, num, predir, name, suffix,
postdir, city, zipcode, eid)
elif county.upper() == 'SARASOTA':
url = 'https://www.sarasotavotes.com/'
eid = '82'
pollingInfo = precinctFinder(url, num, predir, name, suffix,
postdir, city, zipcode, eid)
elif county.upper() == 'VOLUSIA':
fullcounty = 'volusia'
pollingInfo = voterFocus(num, predir, name, suffix, postdir,
city, zipcode, fullcounty)
elif county.upper() == 'OSCEOLA':
fullcounty = 'osceola'
pollingInfo = voterFocus(num, predir, name, suffix, postdir,
city, zipcode, fullcounty)
elif county.upper() == 'LEE':
pollingInfo = getLee(num, predir, name, suffix, postdir,
zipcode)
else:
return '', '', ''
return pollingInfo
except Exception as inst:
print type(inst)
print inst
num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName = getValues(row)
try:
if county.upper() == 'PALM BEACH':
url = 'https://www.pbcelections.org/'
eid = '139'
pollingInfo = precinctFinder(url, num, predir, name, suffix,
postdir, city, zipcode, eid)
elif county.upper() == 'SARASOTA':
url = 'https://www.sarasotavotes.com/'
eid = '82'
pollingInfo = precinctFinder(url, num, predir, name, suffix,
postdir, city, zipcode, eid)
elif county.upper() == 'VOLUSIA':
fullcounty = 'volusia'
pollingInfo = voterFocus(num, predir, name, suffix, postdir,
city, zipcode, fullcounty)
elif county.upper() == 'OSCEOLA':
fullcounty = 'osceola'
pollingInfo = voterFocus(num, predir, name, suffix, postdir,
city, zipcode, fullcounty)
elif county.upper() == 'LEE':
pollingInfo = getLee(num, predir, name, suffix, postdir,
zipcode)
elif county.upper() == 'ST LUCIE' or county.upper() == 'LAKE':
pollingInfo = electionsFL(lastName, dob, num, county)
else:
return '', '', ''
return pollingInfo
except Exception as inst:
print type(inst)
print inst
return '', '', ''
127 changes: 127 additions & 0 deletions vip/KY.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from bs4 import BeautifulSoup
from requests import Session
import Levenshtein
import json


def getValues(row):
num = row['vf_reg_cass_street_num']
predir = row['vf_reg_cass_pre_directional']
name = row['vf_reg_cass_street_name']
suffix = row['vf_reg_cass_street_suffix']
postdir = row['vf_reg_cass_post_directional']
county = row['vf_county_name']
return num, predir, name, suffix, postdir, county


def getHiddenValues(form):
fields = {}
for item in form.find_all('input', {'type': 'hidden'}):
fields[item.get('name')] = item.get('value')
return fields


def getCounties(soup):
counties = {}
selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty'
select = soup.find('select', {'name': selectName})
for item in select.find_all('option'):
counties[item.text.strip().upper()] = item.get('value')
return counties


def matchString(string, stringList):
maximum = 0
string = str(string.strip().upper())
optionList = []
for text in stringList:
newstring = str(text.strip().upper())
score = Levenshtein.ratio(string, newstring)
maximum = max(maximum, score)
optionList.append((score, text))
for option in optionList:
if maximum == option[0]:
return str(option[1])


def processBlanks(value, replacement):
if value == '':
value = replacement
return value


def getJefferson(num, predir, name, suffix, postdir):
url = 'http://www.jeffersoncountyclerk.org/WhereDoIVote/Default.aspx'
addrStr = '{0} {1} {2} {3} {4}'.format(num, predir, name, suffix, postdir)
addrStr = addrStr.strip().replace(' ', ' ').replace(' ', ' ')
session = Session()
data = {'count': 20, 'prefixText': addrStr}
header = {'Content-Type': 'application/json; charset=UTF-8'}
response = session.post(url + '/GetAddress', data=json.dumps(data),
headers=header)
data = json.loads(response.text)
addresses = data['d']
address = matchString(addrStr, addresses)
html = session.get(url).text
fields = getHiddenValues(BeautifulSoup(html).find('form'))
fields['txtStreet'] = address
fields['cmdDisplay'] = 'Search'
response = session.post(url, data=fields)
soup = BeautifulSoup(response.text)
name = ''
address = ''
ppid = ''
nameLabel = soup.find('span', {'id': 'lblLocation'})
addressLabel = soup.find('span', {'id': 'lblAddress'})
if nameLabel is not None:
name = nameLabel.string.strip()
if addressLabel is not None:
address = addressLabel.string.strip()
address += ' LOUISVILLE, KY'
return ppid, name, address


def getFayette(num, predir, name, suffix):
url = 'https://www.fayettecountyclerk.com/web/elections/votingLocationsResults.htm'
session = Session()
fields = {'streetInNumber': num, 'streetInDir': predir,
'streetInName': name, 'streetInType': suffix}
response = session.post(url, data=fields)
soup = BeautifulSoup(response.text)
table = soup.find('table', {'cellpadding': '2'}).find_all('tr')
precinctDict = {}
for row in table:
cells = row.find_all('td')
label = cells[0].get_text().strip()
value = cells[1].get_text().strip()
precinctDict[label] = value
ppid = ''
name = ''
address = ''
if 'Precinct Code:' in precinctDict:
ppid = precinctDict['Precinct Code:'].strip()
if 'Voting Location:' in precinctDict:
name = precinctDict['Voting Location:']
name = name.replace('- View on Map', '').strip()
if 'Precinct Address:' in precinctDict:
address = precinctDict['Precinct Address:'].strip()
if 'Precinct Zip Code:' in precinctDict:
address += ' LEXINGTON, KY '
address += precinctDict['Precinct Zip Code:'].strip()
return ppid, name, address


def run(row):
num, predir, name, suffix, postdir, county = getValues(row)
try:
if county.upper() == 'JEFFERSON':
pollingInfo = getJefferson(num, predir, name, suffix, postdir)
elif county.upper() == 'FAYETTE':
pollingInfo = getFayette(num, predir, name, suffix)
else:
return '', '', ''
return pollingInfo
except Exception as inst:
print type(inst)
print inst
return '', '', ''

0 comments on commit 40ac3e5

Please sign in to comment.