Merge pull request #1 from mlambright/master

update fork
CTCL · Nov 2, 2016 · 40ac3e5 · 40ac3e5
2 parents 8010217 + c2c823f
commit 40ac3e5
Show file tree

Hide file tree

Showing 13 changed files with 984 additions and 75 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,20 @@
+#EA-Tools 2014
+This repository contains a flask app which managed processes for VIP QA and DECC through the 2014 Election. VIP-specific scripts are stored in the vip folder, and decc-specific scripts are stored in the decc folder. Each folder, except app, contains its own README detailing the included scripts.
+
+
+##For this app to run, the following environment variables must be defined:
+###For DECC:
++  *DECCINPUT:* the directory to use to input to decc scripts
++  *DECCOUTPUT:* the directory to output decc files after processing
++  *PGHOST:* the URL or IP of the DECC database server
++  *PGUSER:* the username to connect to the DECC database
++  *PGPASSWORD:* the password associated with PGUSER
++  *PGDB:* the DECC database name
+
+###For VIP QA:
++  *GOOGLE_NATIVE_APP_CLIENT_ID:* The client ID associated with the VIP QA app
++  *GOOGLE_NATIVE_APP_CLIENT_SECRET:* The client secret associated with the VIP QA app
++  *GOOGLE_PUBLIC_API_KEY:* The API key used to query the Google civicInfo API
++  *GOOGLE_GEOCODE_API_KEY:* The API key used to query the Google geocode API
++  *VIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA election-day voting sites 
++  *EVIPQADATA:* The directory containing TargetSmart PII spreadsheets used to QA early voting sites
diff --git a/config.py b/config.py
@@ -1,28 +1,34 @@
-from oauth2client.client import OAuth2WebServerFlow
 import os
 
-api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID')
-api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET')
-api_key = os.getenv('GOOGLE_PUBLIC_API_KEY')
-geokey = os.getenv('GOOGLE_GEOCODE_API_KEY')
-vip_qa_data = os.getenv('VIPQADATA')
-ev_qa_data = os.getenv('EVIPQADATA')
+#This import is needed for VIP
+from oauth2client.client import OAuth2WebServerFlow
+
+#These are generally useful
+CSRF_ENABLED = True
+SECRET_KEY = os.getenv('SECRET_KEY')
+
+#These variables configure the DECC scripts
 deccinputdir = os.getenv('DECCINPUT')
 deccoutputdir = os.getenv('DECCOUTPUT')
 HOST = os.getenv('PGHOST')
 USER = os.getenv('PGUSER')
 DB = os.getenv('PGDB')
 PASSWORD = os.getenv('PGPASSWORD')
 
-CSRF_ENABLED = True
-SECRET_KEY = os.getenv('SECRET_KEY')
-
+#These are all VIP Variables
+api_id = os.getenv('GOOGLE_NATIVE_APP_CLIENT_ID')
+api_secret = os.getenv('GOOGLE_NATIVE_APP_CLIENT_SECRET')
+api_key = os.getenv('GOOGLE_PUBLIC_API_KEY')
+geokey = os.getenv('GOOGLE_GEOCODE_API_KEY')
+vip_qa_data = os.getenv('VIPQADATA')
+ev_qa_data = os.getenv('EVIPQADATA')
 states = {'AL': 'Alabama', 'AR': 'Arkansas', 'AZ': 'Arizona', 'ME': 'Maine',
           'NH': 'New Hampshire', 'TN': 'Tennessee', 'LA': 'Louisiana',
           'IL': 'Illinois', 'IN': 'Indiana', 'ID': 'Idaho', 'GA': 'Georgia',
           'MA': 'Massachusetts', 'SD': 'South Dakota', 'VT': 'Vermont',
-          'FL': 'Florida', 'MS': 'Mississippi'}
-
+          'FL': 'Florida', 'MS': 'Mississippi', 'KY': 'Kentucky',
+          'TX': 'Texas', 'SC': 'South Carolina', 'WV': 'West Virginia',
+          'NM': 'New Mexico'}
 scope1 = 'https://spreadsheets.google.com/feeds'
 scope2 = 'https://www.googleapis.com/auth/drive'
 scope = '{0} {1}'.format(scope1, scope2)
@@ -31,6 +37,5 @@
                            client_secret=api_secret,
                            scope=scope,
                            redirect_uri=redirect)
-
 vipTemplateKey = '1qcqHBizQeFJwXsORMS_QS59gywuT9TRifwQe4BM_G3E'
 evTemplateKey = '1_uEKMFrFxfu69Ws-2QbmUPm1kFNMY5txGJzG8bfzK4s'
diff --git a/decc/README.md b/decc/README.md
@@ -0,0 +1,34 @@
+#DECC Processing Scripts
+
+These scripts handle decc processing as managed by the Flask app. Each script is detailed below
+
++  *processScans.py* processes newly received digital or physical orders.
+  +  *findClients()* accepts a psycopg2 cursor object and queries the DECC database to list clients.
+  +  *getProject()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns a list of projects associated with the provided client ID.
+  +  *findOrders()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of orders associated with the provided project ID.
+  +  *createOrder()* accepts a project ID and psycopg2 cursor object. It inserts a new order record into the DECC database.
+  +  *findTypes()* accepts a project ID and psycopg2 cursor object. It queries the DECC database and returns a list of form types associated with the provided project ID.
+  +  *createPart()* accepts an order ID, type ID, state, booleans indicating whether the order is rush, will be uploaded to van, matched to vendor, or sent to quad, a psycopg2 cursor object, and a psycopg2 db connection object. It inserts a new part record into the DECC database and returns the part ID.
+  +  *obtainStartNum()* accepts a client ID and psycopg2 cursor object. It queries the DECC database and returns the next batch number associated with the given client.
+  +  *processPDF()* is run for digitally transmitted orders. It accepts an input directory, output directory, starting batch number, part ID, psycopg2 cursor object, and psycopg2 database connection object. It iterates over every file listed recursively in the input directory and inserts a new batch record (including total pages) in the DECC database for each. It returns the ending batch number and the total number of pages processed.
+  +  *processPhysical()* is run for physically shipped orders. It accepts an input file, output file, part ID, starting batch number, psycopg2 database connection object, psycopg2 cursor object, and order ID. It reads the input file and creates a new batch record for each row using the 'Batch Name' column in the input file. It the writes out all batches created with name and ID.
+  +  *getCursor()* accepts a host, database, username, and password, and returns a psycopg2 cursor object and a psycopg2 database connection object.
+
+
++  *processXLSX.py* process returned data from the Data-entry vendor. 
+  +  *getBatches()* accepts a psycopg2 cursor object and returns a dictionary listing all DECC batch information from the DECC database.
+  +  *writeFile()* accepts a list of row dictionaries to be written, an output filename, and a list of headers. It writes out the list of dictionaries with the given headers to the output filename.
+  +  *processXLSX()* accepts an input filename referencing an Excel file, a psycopg2 database connection object, and a psycopg2 cursor object. It reads in the excel file, and iterates over each row matching to its original batch name. It then updates batch entries with the final number of records.
+  +  *main()* accepts a boolean indicating whether a file contains VR records, an input filename, and an output filename. It connects to the DECC database, reads in the input file, runs processXLSX, and calls vrqc.py if the file is voter registration. It then outputs to the output file.
+
++  *vrqc.py* runs quality checks on returned voter registration data.
+  +  *readCSV()* accepts a filename and returns a list of dictionaries containing data for each row.
+  +  *writeCSV()* accepts a list of dictionaries containing row data, an output filename, and a list of headers. It writes the list of dictionaries out to the output filename using the list of headers.
+  +  *getFIPS()* accepts a url containing FIPS code translation data. It returns a dictionary mapping FIPS codes to county names, and a dictionary mapping county names to state abbreviations.
+  +  *getZipURL()* accepts the URL of the page listing HUD zip-FIPS code mapping files, and obtains the URL of the most recent HUD file mapping Zip Codes to county FIPS codes.
+  +  *getZips()* accepts a URL of a HUD file mapping zip codes to county FIPS codes. It returns a list of dictionaries with keys ZIP and FIPS.
+  +  *buildZipTranslator()* accepts the FIPS dictionary created in getFIPS(), and the list created by getZips() and creates a single dictionary with zip codes as keys, and as values, a list of dictionaries with STATE and COUNTY as keys
+  +  *inspectRows()* accepts the list of row dictionaries, the zip translator, and the stateDict. It iterates over each row of voter registration data and checks whether the data included make any sense. It returns an updated list of row dictionaries, and an aggregate report.
+  +  *report()* writes out as JSON the object passed to it as an argument.
+  +  *concatenateFields()* concatenates the values for addresses and dates to create values that are more acceptable to VAN.
+  +  *run()* accepts as argument the list of Dictionaries from processXLSX.py, and returns a final QC'd version of that list.
diff --git a/decc/processScans.py b/decc/processScans.py
@@ -19,8 +19,8 @@ def findClients(cursor):
 
 def getProject(clientID, cursor):
     cursor.execute('''SELECT project_id
-                    FROM decc_form_client
-                    WHERE id = {0}
+                      FROM decc_form_client
+                      WHERE id = {0}
                     '''.format(clientID))
     value = cursor.fetchall()[0][0]
     return value
@@ -67,9 +67,9 @@ def createPart(orderID, typeID, state, rush, van, match, quad, cursor, db):
                                  match))
     db.commit()
     cursor.execute('''SELECT MAX(id)
-                                            FROM decc_form_part
-                                            WHERE order_id = {0}
-                                            '''.format(orderID))
+                      FROM decc_form_part
+                      WHERE order_id = {0}
+                      '''.format(orderID))
     result = cursor.fetchall()[0][0]
     return result
 

diff --git a/vip/FL.py b/vip/FL.py
@@ -14,7 +14,11 @@ def getValues(row):
     city = row['vf_reg_cass_city']
     zipcode = row['vf_reg_cass_zip']
     county = row['vf_county_name']
-    return num, predir, name, suffix, postdir, city, zipcode, county
+    date = str(row['voterbase_dob'])
+    lastName = row['tsmart_last_name']
+    if len(date) == 8:
+        dob = '{0}/{1}/{2}'.format(date[4:6], date[6:8], date[:4])
+    return num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName
 
 
 def getHiddenValues(form):
@@ -24,15 +28,6 @@ def getHiddenValues(form):
     return fields
 
 
-def getCounties(soup):
-    counties = {}
-    selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty'
-    select = soup.find('select', {'name': selectName})
-    for item in select.find_all('option'):
-        counties[item.text.strip().upper()] = item.get('value')
-    return counties
-
-
 def matchString(string, stringList):
     maximum = 0
     string = str(string.strip().upper())
@@ -160,35 +155,59 @@ def getLee(num, predir, name, suffix, postdir, zipcode):
     return ppid, name, address
 
 
+def electionsFL(lastName, dob, num, county):
+    url = 'https://www.electionsfl.org/VoterInfo/asmx/service1.asmx/'
+    county = county.replace(' ', '').lower()
+    header = {'Content-Type': 'application/json; charset=UTF-8'}
+    session = Session()
+    payload = {'LastName': lastName, 'BirthDate': dob, 'StNumber': num,
+               'County': county, 'FirstName': '', 'challengeValue': '',
+               'responseValue': ''}
+    response = session.post(url + 'FindVoter', data=json.dumps(payload),
+                            headers=header)
+    print response.text
+    voterID = str(json.loads(json.loads(response.text)['d'])[0]['FVRSVoterIdNumber'])
+    payload = {'FVRSVoterIDNumber': voterID, 'CurCounty': county}
+    response = session.post(url + 'GetElectionInfo', data=json.dumps(payload),
+                            headers=header)
+    print response.text
+    data = json.loads(json.loads(response.text)['d'])[0]
+    ppid = ''
+    name = data['place_name']
+    address = data['office_location']
+    return ppid, name, address
+
+
 def run(row):
-    num, predir, name, suffix, postdir, city, zipcode, county = getValues(row)
-    while True:
-        try:
-            if county.upper() == 'PALM BEACH':
-                url = 'https://www.pbcelections.org/'
-                eid = '139'
-                pollingInfo = precinctFinder(url, num, predir, name, suffix,
-                                             postdir, city, zipcode, eid)
-            elif county.upper() == 'SARASOTA':
-                url = 'https://www.sarasotavotes.com/'
-                eid = '82'
-                pollingInfo = precinctFinder(url, num, predir, name, suffix,
-                                             postdir, city, zipcode, eid)
-            elif county.upper() == 'VOLUSIA':
-                fullcounty = 'volusia'
-                pollingInfo = voterFocus(num, predir, name, suffix, postdir,
-                                         city, zipcode, fullcounty)
-            elif county.upper() == 'OSCEOLA':
-                fullcounty = 'osceola'
-                pollingInfo = voterFocus(num, predir, name, suffix, postdir,
-                                         city, zipcode, fullcounty)
-            elif county.upper() == 'LEE':
-                pollingInfo = getLee(num, predir, name, suffix, postdir,
-                                     zipcode)
-            else:
-                return '', '', ''
-            return pollingInfo
-        except Exception as inst:
-            print type(inst)
-            print inst
+    num, predir, name, suffix, postdir, city, zipcode, county, dob, lastName = getValues(row)
+    try:
+        if county.upper() == 'PALM BEACH':
+            url = 'https://www.pbcelections.org/'
+            eid = '139'
+            pollingInfo = precinctFinder(url, num, predir, name, suffix,
+                                         postdir, city, zipcode, eid)
+        elif county.upper() == 'SARASOTA':
+            url = 'https://www.sarasotavotes.com/'
+            eid = '82'
+            pollingInfo = precinctFinder(url, num, predir, name, suffix,
+                                         postdir, city, zipcode, eid)
+        elif county.upper() == 'VOLUSIA':
+            fullcounty = 'volusia'
+            pollingInfo = voterFocus(num, predir, name, suffix, postdir,
+                                     city, zipcode, fullcounty)
+        elif county.upper() == 'OSCEOLA':
+            fullcounty = 'osceola'
+            pollingInfo = voterFocus(num, predir, name, suffix, postdir,
+                                     city, zipcode, fullcounty)
+        elif county.upper() == 'LEE':
+            pollingInfo = getLee(num, predir, name, suffix, postdir,
+                                 zipcode)
+        elif county.upper() == 'ST LUCIE' or county.upper() == 'LAKE':
+            pollingInfo = electionsFL(lastName, dob, num, county)
+        else:
             return '', '', ''
+        return pollingInfo
+    except Exception as inst:
+        print type(inst)
+        print inst
+        return '', '', ''
diff --git a/vip/KY.py b/vip/KY.py
@@ -0,0 +1,127 @@
+from bs4 import BeautifulSoup
+from requests import Session
+import Levenshtein
+import json
+
+
+def getValues(row):
+    num = row['vf_reg_cass_street_num']
+    predir = row['vf_reg_cass_pre_directional']
+    name = row['vf_reg_cass_street_name']
+    suffix = row['vf_reg_cass_street_suffix']
+    postdir = row['vf_reg_cass_post_directional']
+    county = row['vf_county_name']
+    return num, predir, name, suffix, postdir, county
+
+
+def getHiddenValues(form):
+    fields = {}
+    for item in form.find_all('input', {'type': 'hidden'}):
+        fields[item.get('name')] = item.get('value')
+    return fields
+
+
+def getCounties(soup):
+    counties = {}
+    selectName = 'ctl00$ContentPlaceHolder1$usrCounty$cboCounty'
+    select = soup.find('select', {'name': selectName})
+    for item in select.find_all('option'):
+        counties[item.text.strip().upper()] = item.get('value')
+    return counties
+
+
+def matchString(string, stringList):
+    maximum = 0
+    string = str(string.strip().upper())
+    optionList = []
+    for text in stringList:
+        newstring = str(text.strip().upper())
+        score = Levenshtein.ratio(string, newstring)
+        maximum = max(maximum, score)
+        optionList.append((score, text))
+    for option in optionList:
+        if maximum == option[0]:
+            return str(option[1])
+
+
+def processBlanks(value, replacement):
+    if value == '':
+        value = replacement
+    return value
+
+
+def getJefferson(num, predir, name, suffix, postdir):
+    url = 'http://www.jeffersoncountyclerk.org/WhereDoIVote/Default.aspx'
+    addrStr = '{0} {1} {2} {3} {4}'.format(num, predir, name, suffix, postdir)
+    addrStr = addrStr.strip().replace('   ', ' ').replace('  ', ' ')
+    session = Session()
+    data = {'count': 20, 'prefixText': addrStr}
+    header = {'Content-Type': 'application/json; charset=UTF-8'}
+    response = session.post(url + '/GetAddress', data=json.dumps(data),
+                            headers=header)
+    data = json.loads(response.text)
+    addresses = data['d']
+    address = matchString(addrStr, addresses)
+    html = session.get(url).text
+    fields = getHiddenValues(BeautifulSoup(html).find('form'))
+    fields['txtStreet'] = address
+    fields['cmdDisplay'] = 'Search'
+    response = session.post(url, data=fields)
+    soup = BeautifulSoup(response.text)
+    name = ''
+    address = ''
+    ppid = ''
+    nameLabel = soup.find('span', {'id': 'lblLocation'})
+    addressLabel = soup.find('span', {'id': 'lblAddress'})
+    if nameLabel is not None:
+        name = nameLabel.string.strip()
+    if addressLabel is not None:
+        address = addressLabel.string.strip()
+        address += ' LOUISVILLE, KY'
+    return ppid, name, address
+
+
+def getFayette(num, predir, name, suffix):
+    url = 'https://www.fayettecountyclerk.com/web/elections/votingLocationsResults.htm'
+    session = Session()
+    fields = {'streetInNumber': num, 'streetInDir': predir,
+              'streetInName': name, 'streetInType': suffix}
+    response = session.post(url, data=fields)
+    soup = BeautifulSoup(response.text)
+    table = soup.find('table', {'cellpadding': '2'}).find_all('tr')
+    precinctDict = {}
+    for row in table:
+        cells = row.find_all('td')
+        label = cells[0].get_text().strip()
+        value = cells[1].get_text().strip()
+        precinctDict[label] = value
+    ppid = ''
+    name = ''
+    address = ''
+    if 'Precinct Code:' in precinctDict:
+        ppid = precinctDict['Precinct Code:'].strip()
+    if 'Voting Location:' in precinctDict:
+        name = precinctDict['Voting Location:']
+        name = name.replace('- View on Map', '').strip()
+    if 'Precinct Address:' in precinctDict:
+        address = precinctDict['Precinct Address:'].strip()
+    if 'Precinct Zip Code:' in precinctDict:
+        address += ' LEXINGTON, KY '
+        address += precinctDict['Precinct Zip Code:'].strip()
+    return ppid, name, address
+
+
+def run(row):
+    num, predir, name, suffix, postdir, county = getValues(row)
+    try:
+        if county.upper() == 'JEFFERSON':
+            pollingInfo = getJefferson(num, predir, name, suffix, postdir)
+        elif county.upper() == 'FAYETTE':
+            pollingInfo = getFayette(num, predir, name, suffix)
+        else:
+            return '', '', ''
+        return pollingInfo
+    except Exception as inst:
+        print type(inst)
+        print inst
+        return '', '', ''