In [1]:
#re-imports modules that have changed
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../")

In [3]:
import os
import logging
import json
import datetime as dt
import time
import pandas as pd
from src.candidate_fetcher import VoterInfo
from src.utils_cloud_storage import CloudStorageClient

In [4]:
logging.basicConfig(level="INFO")

In [5]:
# Constants

In [6]:
# retrieve county level address data and set fips code as the identifer 

In [7]:
def run_voter_info(): 
    
    """
    FAILED - 500 - no explanation - FULL PROCESS
    Cloud function to run job to fetch voter information data for current elections
    from Google Civic Information API.
    Retrieves county level data and sets the fips code as the identifier (`geo_id`)
    Stores data in a cloud storage bucket as a json file.
    """
    
    # Job status
    logging.info("Starting job fetch voter information")
    #logging.info("""Trigger: messageId {} published at {}""".format(context.event_id, context.timestamp))
    
    # Initiate job
    civic = VoterInfo() 
    
    # Load elections, address data
    logging.info("Load list of current elections.")
    elections = civic.load_current_elections("current_elections",  "current_elections.json")
    
    logging.info("Load addreses by locale")
    locales = civic.load_address_locales("address_locales",  "addresses_county.csv")
    
    # Retrieve Voter information data
    for election in elections[0:2]:
        election_id = election['id']
        election_name = election['name']
        election_ocdid = election['ocdDivisionId']
        
        # Get state abbr from OCDid
        election_ocdid = election_ocdid.split("/")[-1].split(":")[-1].upper()

        logging.debug(f"election_id: {election_id}")
        logging.debug(f"election_ocdid: {election_ocdid}")
        logging.debug(f"election_name: {election_name}")
        
        # Subset data by OCDid
        # Except test election 
        if election_name == 'VIP Test Election': 
            continue
        # If election is national, return data for all records
        elif election_ocdid == 'US':
            active = locales.copy()
        # If election is statewide, return data for all records in state. 
        else: 
            active = locales.loc[locales['state_abbr'] == election_ocdid, :].copy()
        # Ensure active elections not null
        try: 
            assert(active.empty == False)
        except Exception as e: 
            logging.error("Unable to subset data by OCDid.")
    
            # Get voter information for election
        try: 
            logging.info(f"Start election: {election_id}:{election_ocdid}") 
            for index, row in active.iterrows():
                address = row['address']
                geo_id = row["fips"]
                response = civic.fetch_voter_info(address, election_id)
                response['geoid'] = {"fips":geo_id}
                civic.save_voter_info(geo_id, response, bucket_name="current_contests")
                time.sleep(1)
            logging.debug(f"Completed election: {election_id}:{election_ocdid}")
        except Exception as error: 
            logging.error(f"Failed to retrieve data for {election_id}:{election_ocdid}")
            logging.error(error)

    
    # Job status
    logging.info("Completed job fetch voter info.")

In [8]:
#run_voter_info()

In [9]:
#len(active)

In [10]:
#election_id

In [11]:
#active.head(3)

In [12]:
# Initiate job
civic = VoterInfo() 
    
# Load elections, address data
logging.info("Load list of current elections.")
elections = civic.load_current_elections("current_elections",  "current_elections.json")

INFO:root:Load list of current elections.
INFO:root:Successfully loaded current elections data.


In [13]:
print(len(elections))
election = elections[1]
election

17


{'id': '4897',
 'name': 'South Carolina Presidential Primary',
 'electionDay': '2020-02-29',
 'ocdDivisionId': 'ocd-division/country:us/state:sc'}

In [100]:
event = election


In [70]:
def publish_active_elections(context, event):
    """
    Publishes elections to Pub/Sub topic with an error handler.
    Data included in attributes of messsage: 
            - election_id=election['id'],
            - name=election['name'], 
            - electionDay=election['electionDay'],
            - ocdDivisionId=election['ocdDivisionId']
    
    """
    def get_callback(f, data):
        def callback(f):
            try:
                logging.info(f.result())
                futures.pop(data)
            except:  # noqa
                logging.info("Please handle {} for {}.".format(f.exception(), data))

        return callback
    
    # Job status
    logging.info("Starting job to publish elections.")
    #logging.info("""Trigger: messageId {} published at {}""".format(context.event_id, context.timestamp))
    
    # Initiate job
    civic = VoterInfo()
    date = dt.datetime.now().date()
    
    # Load elections data
    logging.info("Load list of current elections.")
    elections = civic.load_current_elections("current_elections",  "current_elections.json")
    
    project_id = "election-tracker-268319"
    topic_name = "active-elections"

    publisher = pubsub_v1.PublisherClient()
    topic_path = publisher.topic_path(project_id, topic_name)

    futures = dict()
    
    for election in elections: 
        # Set message attributes 
        
        data = str(election['id'])
        
        # When you publish a message, the client returns a future. Data must be a bytestring.
        futures.update({data: None})
        
        future = publisher.publish(
            topic_path, 
            data=data.encode("utf-8"),
            election_id=election['id'],
            name=election['name'], 
            electionDay=election['electionDay'],
            ocdDivisionId=election['ocdDivisionId']
        )
                                     
        futures[data] = future
        # Publish failures shall be handled in the callback function.
        future.add_done_callback(get_callback(future, data))

    # Wait for all the publish futures to resolve before exiting.
    while futures:
        time.sleep(5)

    print(f"Published active elections for current elections as of {str(date)}")
    

In [41]:
publish_active_elections()

INFO:root:Starting job to publish elections.
INFO:root:Load list of current elections.
INFO:root:Successfully loaded current elections data.
INFO:root:443326984175662
INFO:root:443326984175663
INFO:root:443326984175664
INFO:root:443326984175665
INFO:root:443326984175666
INFO:root:443326984175667
INFO:root:443326984175668
INFO:root:443326984175669
INFO:root:443326984175670
INFO:root:443326984175671
INFO:root:443326984175672
INFO:root:443326984175673
INFO:root:443326984175674
INFO:root:443326984175675
INFO:root:443326984175676
INFO:root:443326984175677
INFO:root:443326984175678


Published active elections for current elections as of 2020-02-21


In [71]:
import time
from google.cloud import pubsub_v1


def publish_active_divisions(election):
    """
    Publishes parsed election data by division to a Pub/Sub topic with an error handler.
    
    For each division associated with an election, message includes: 
        - election_id=election_id, # As returned by Civic Information API 
        - address=address, # Address of geo division associated with election parsed from locales data.
        - geo_id=geo_id # Fips code or similar geodivision identifier as parsed from locales data
        
    """
    
    def get_callback(f, data):
        def callback(f):
            try:
                logging.info(f.result())
                futures.pop(data)
            except:  # noqa
                logging.info("Please handle {} for {}.".format(f.exception(), data))

        return callback
    
    # Job status
    logging.info("Starting job to parse election.")
    #logging.info("""Trigger: messageId {} published at {}""".format(context.event_id, context.timestamp))
    
    # Initiate job
    civic = VoterInfo() 
    
    logging.info("Load addreses by locale")
    locales = civic.load_address_locales("address_locales",  "addresses_county.csv")
    
    project_id = "election-tracker-268319"
    topic_name = "active-divisions"

    publisher = pubsub_v1.PublisherClient()
    topic_path = publisher.topic_path(project_id, topic_name)

    futures = dict()
    
    # Parse election 
    election_id = election['election_id'] #renamed to avoid conflict
    election_name = election['name']
    election_ocdid = election['ocdDivisionId']

    # Get state abbr from OCDid
    election_ocdid = election_ocdid.split("/")[-1].split(":")[-1].upper()

    logging.debug(f"election_id: {election_id}")
    logging.debug(f"election_ocdid: {election_ocdid}")
    logging.debug(f"election_name: {election_name}")

    # Subset data by OCDid
    # Except test election 
    if election_name == 'VIP Test Election':
        logging.debug("Election name 'VIP Test Election' excluded.")
        return
    # If election is national, return data for all records
    elif election_ocdid == 'US':
        active = locales.copy()
    # If election is statewide, return data for all records in state. 
    else: 
        active = locales.loc[locales['state_abbr'] == election_ocdid, :].copy()
    # Ensure active elections not null
    try: 
        assert(active.empty == False)
    except Exception as e: 
        logging.error("Unable to subset data by OCDid.")
        raise

    # publish active division
    for index, row in active.iterrows():
        data = str(row["fips"])
        
        futures.update({data: None})

        # When you publish a message, the client returns a future. Data must be a bytestring.
        future = publisher.publish(
            topic_path, 
            data=data.encode("utf-8"),
            election_id=election_id, 
            address=row['address'], 
            geo_id=str(row["fips"])
        )
        futures[data] = future
        # Publish failures shall be handled in the callback function.
        future.add_done_callback(get_callback(future, data))

    # Wait for all the publish futures to resolve before exiting.
    while futures:
        time.sleep(5)

    logging.info(f"Published active divisions for election {election_id}")

In [72]:
# Substitute PROJECT and SUBSCRIPTION with appropriate values for your
# application.
subscription_path = subscriber.subscription_path("election-tracker-268319", 'test_elections')
response = subscriber.pull(subscription_path, max_messages=5)

for msg in response.received_messages:
    print("Received message:", msg.message.data)

ack_ids = [msg.ack_id for msg in response.received_messages]
subscriber.acknowledge(subscription_path, ack_ids)

Received message: b'4912'
Received message: b'4914'
Received message: b'4924'
Received message: b'4930'
Received message: b'4939'


In [74]:
election = msg.message.attributes
election

{'election_id': '4939', 'name': 'Rhode Island House District 56 Special Election', 'electionDay': '2020-03-03', 'ocdDivisionId': 'ocd-division/country:us/state:ri'}

In [75]:
publish_active_divisions(election)

INFO:root:Starting job to parse election.
INFO:root:Load addreses by locale
INFO:root:Successfully loaded address lookup data.
INFO:root:443327314103440
INFO:root:443327314103441
INFO:root:443327314103442
INFO:root:443327314103443
INFO:root:443327314103444
INFO:root:Published active divisions for election 4939


In [76]:
# Substitute PROJECT and SUBSCRIPTION with appropriate values for your
# application.
subscription_path = subscriber.subscription_path("election-tracker-268319", 'test_divisions')
response = subscriber.pull(subscription_path, max_messages=5)

for msg in response.received_messages:
    print("Received message:", msg.message.data)

ack_ids = [msg.ack_id for msg in response.received_messages]
subscriber.acknowledge(subscription_path, ack_ids)

Received message: b'44001'
Received message: b'44003'
Received message: b'44005'
Received message: b'44007'
Received message: b'44009'


In [78]:
division = msg.message.attributes
division

{'address': '79 Cherokee Bend, Charlestown, RI 02813, USA', 'geo_id': '44009', 'election_id': '4939'}

In [98]:
# Def fetch election information
def run_voter_info(division): 
    """
    Retrieves voter information from Google Civic API
    Takes: 
        Data returned from the active-divisons topic message: 
        - election_id=election_id, # As returned by Civic Information API 
        - address=address, # Address of geo division associated with election parsed from locales data.
        - geo_id=geo_id # Fips code or similar geodivision identifier as parsed from locales data
    Makes the API Call 
    Saves the data to Google Cloud Storage
    """
    address = division['address']
    geo_id = division['geo_id']
    election_id = division['election_id']
    
    civic = VoterInfo() 
    
    try: 
        logging.info(f"Start VoterInfo call: {election_id}:{geo_id}") 
        response = civic.fetch_voter_info(address, election_id)
        response['geoid'] = {"fips":geo_id}
        civic.save_voter_info(geo_id, response, bucket_name="current_contests")
        time.sleep(1)
        logging.debug(f"Completed VoterInfo call.")
    except Exception as error: 
        logging.error(f"Failed to retrieve data for {election_id}:{geo_id}")
        logging.error(error)

In [99]:
run_voter_info(division)

INFO:root:Start VoterInfo call: 4939:44009
ERROR:root:Successfully saved data for 44009 to: gs://current_contests/44009_2020-02-21.json
