In [88]:
# The following notebook can be used for more interactive usage of the preprocessing pipeline, probably only useful if you are enhancing preprocessing.
# Google API Key, and our username and password need to be included manually
import requests
import os
import re
import googlemaps
gmaps = googlemaps.Client(key='OMITTED')

In [89]:
authResponse = requests.post("http://localhost:5000/api/v1/auth/login", data={"email": "OMITTED","password": "OMITTED"})
authToken = authResponse.json()['token']
headers = {'Authorization': 'Bearer ' + authToken}

freshInboundRows = requests.get("http://localhost:5000/api/v1/internal/inbound/", headers=headers)
rows = freshInboundRows.json()

In [90]:
def get_formatted_address(full_address):
    if(full_address == None):
        full_address = ''
        
    geocode_result = gmaps.geocode(full_address)
                
    if(len(geocode_result) > 0):
        primary_result = geocode_result[0]
        formatted_address = primary_result['formatted_address']
        lat_lng = primary_result['geometry']['location']
        google_place_id = primary_result['place_id']
        
        #NOTE: We attempted to use google's 'vicinity' for a shortened address, but it doesn't work as advertised.
        #places_data = gmaps.place(google_place_id)
        #print('vicinity: ', places_data['vicinity'])
        
        return {'formatted_address': formatted_address, 'lat_lng': lat_lng, 'google_place_id': google_place_id}
    else:
        return None

In [91]:
# Returns empty string if phone is invalid
def get_formatted_phone(phone_field):
    if(phone_field == None):
        phone_field = ''
        
    contains_phone = re.search("^([1]?[\s]?[\-]?[\s]?)([\(]?[\s]?[0-9]{3}[\s]?[\)]?[\s]?[\-]{0,1}[\s]?[0-9]{3}[\s]?[\-]{0,1}[\s]?([0-9]{4}|[A-Z]{4})[\s]?[\-]{0,1}[\s]?)((ext|Ext|EXT)[\s]?[.]?[\s]?[0-9]{1,2})?", phone_field)
    
    clean_phone_digits = ""
    if(contains_phone):
        
        clean_phone_digits = re.sub("\D", "", contains_phone.group(2))
        clean_phone_digits = '({}){} {}'.format(clean_phone_digits[0:3], clean_phone_digits[3:6], clean_phone_digits[6:])
        if(contains_phone.group(4)):
            ext_segment = 'Ext {}'.format(re.sub("\D", "", contains_phone.group(4)))
            clean_phone_digits = '{} {}'.format(clean_phone_digits, ext_segment)
        
    return clean_phone_digits

In [92]:
def is_likely_drive_thru(description_field):
    if(description_field == None):
        description_field = ''
        
    drive_thru_flag = re.search('(curbside|drive thru|drive-by|drive by|drive-up|drive up|drive\-thru|inside your car|in the car|in car)|(wait in [a-zA-Z]{0,} car)|(remain in [a-zA-Z]{0,} vehicle)', description_field.lower())
    if(drive_thru_flag != None):
        return True
    return False
            

In [93]:
def is_likely_appointment_required(description_field):
    if(description_field == None):
        description_field = ''
        
    appt_flag = re.search('appointment|appt|schedule|pre-registration', description_field.lower())
    if(appt_flag != None):
        return True
    return False

In [94]:
def is_likely_screen_required(description_field):
    if(description_field == None):
        description_field = ''
        
    screen_flag = re.search('screen|referral|referred|referal|medical order|order from a medical provider|pre-registration|qualify|call in advance|call ahead|calling ahead|call to be approved|call first|doctor\'s order|call ([a-z\']{1,14}\s?){0,5} (prior|before)|required ([a-z\']{1,14}\s?){0,5} virtual visits|online assessment|if your doctor|need telemedicine appointment|must receive guidance', description_field.lower())
    if(screen_flag != None):
        return True
    return False

In [95]:
# A very minimal check to weed-out obvious junk
def is_valid_URL(description_field = ""):
    if(description_field == None):
        description_field = ''
        
    url_flag = re.search('^(http|www)', description_field.lower())
    if(url_flag != None):
        return True
    return False

In [96]:
for row in rows:
    #Extract structured fields from full_address
    
    address_components = get_formatted_address(row['full_address'])
    formatted_phone = get_formatted_phone(row['phone'])
    drive_thru = is_likely_drive_thru(row['description'])
    app_required = is_likely_appointment_required(row['description'])
    screen_required = is_likely_screen_required(row['description'])
    valid_url_flag = is_valid_URL(row['url'])
    
    print(address_components)
    print(formatted_phone)
    print('drive thru? ', drive_thru)
    print('appt req? ', app_required)
    print('screen? ', screen_required)
    print('is valid URL? ', valid_url_flag)
    print(row)
    break

{'formatted_address': '320 W Pumping Station Rd #3, Quakertown, PA 18951, USA', 'lat_lng': {'lat': 40.4582455, 'lng': -75.36824279999999}, 'google_place_id': 'EjYzMjAgVyBQdW1waW5nIFN0YXRpb24gUmQgIzMsIFF1YWtlcnRvd24sIFBBIDE4OTUxLCBVU0EiHRobChYKFAoSCdegZ_BcIsSJERfa5kCtcJEXEgEz'}
(555)123 4567
drive thru?  False
appt req?  False
screen?  True
is valid URL?  False
{'id': 1, 'full_address': '320 W Pumping Station Road Suite 3 , Quakertown PA 18951', 'name': 'DEV COVID-19 Testing Center Richland Township', 'phone': '(555) - 123-4567', 'url': None, 'description': 'You must receive guidance via the MyLVHN Nurse Information line at 1-800-555-1234 or complete an LVHN Video Visit by downloading the MyLVHN app to be assessed and possibly tested. When you arrive at the Assess and Test office, go inside and check in with the registration desk. You will be asked to provide your mobile phone number. Then you can return to your car and wait until the Assess and Test office calls your mobile phone, lett