In [1]:
!pip install geopy



In [14]:
import pandas as pd
import time
from functools import reduce
from geopy.geocoders import Nominatim

In [15]:
# setup geopy
geolocator = Nominatim(user_agent="preprocessing_affordable_units")

# bounding box to discard erroneous address lookups.
# taken from here: https://anthonylouisdagostino.com/bounding-boxes-for-all-us-states/

x_bound = [-73.508142, -69.928393]
y_bound = [41.237964, 42.886589]

In [20]:
def get_zip(addr: str):
    # assumes each field is comma-separated
    parts = addr.split(', ')
    for p in parts:
        if reduce(lambda prev, c: prev * (c in '0123456789'), p, True) and (len(p) == 5):
            # probably a zip code. probably
            return p


def find_coordinates(df_row):
    addr = df_row['Building Address'].replace('\n', ' ')
    # prevent too many requests from being sent in one short time interval
    time.sleep(0.1)
    try:
        location = geolocator.geocode(addr)
    except Exception as e:
        print("Error", e, "while getting", addr)
        return (pd.NA, pd.NA, pd.NA)
    
    if location == None:
        print("No location for", addr)
        return (pd.NA, pd.NA, pd.NA)
    else:
        print("Found location for", addr)
        if (y_bound[0] < location.latitude < y_bound[1]) and (x_bound[0] < location.longitude < x_bound[1]):
            return (location.latitude, location.longitude, get_zip(location.address))
        else:
            print("Address lookup returned a bad result: discarding")
            return (pd.NA, pd.NA, pd.NA)

In [21]:
# add coordinates to main dataset using geopy
df = pd.read_csv('../data/affordable_units.csv')
df['coordinates'] = df.apply(find_coordinates, axis=1)
df.to_csv('../data/preprocessed_affordable_units_withZIP.csv')

Found location for 8 Griggs Street
Found location for 31 North Beacon Street
Found location for 40 Malvern Street
Found location for 450 Cambridge Street
Found location for 1505 Commonwealth Avenue
No location for Continuum 219 Western Avenue
No location for The Eco Allston 75 Brainerd Road
No location for The Edge 60–66 Brainerd Road
No location for The Element 65 Brainerd Road
No location for The Gateway 1298 & 1302 Commonwealth Ave
No location for The Radius Apartments 530 Western Avenue 
Found location for Trac 75 75 Braintree Street
Found location for 30 Dalton 30 Dalton Street
Found location for Avalon @ Exeter 77 Exeter Street
Found location for Avalon @ Prudential 780 Boylston Street
No location for Mandarin Oriental 778 Boylston Street
Found location for 150 Chestnut Hill Avenue
Found location for 375 Market Street
No location for Hichblock 24-26 Hichborn Street
No location for St. Gabriel's Apartments 159-201 Washington Street
No location for Waterstone at the Circle 385 Ches