In [3]:
import json
from geopy.geocoders import Nominatim
import re
import time

In [4]:
with open('./hospital_to_add.json', 'r') as file:
    hospital_to_add = json.load(file)

hospital_to_coords = {}
geolocator = Nominatim(user_agent="geocoding_app")

# Loop through each hospital and address
failures = {}
for hospital, address in hospital_to_add.items():
    try:
        location = geolocator.geocode(address)
        if location:
            hospital_to_coords[hospital] = (location.latitude, location.longitude)

        else:
            failures[hospital] = 'Some issue'
            
    except Exception as e:
        failures[hospital] = e

    time.sleep(1)  # Be nice to the Nominatim API: add a short delay

In [5]:
len(failures)

89

In [6]:
second_pass_coords = {}
still_failed = {}

for hospital in failures.keys():
    address = hospital_to_add[hospital]
    
    # Try to extract city, state, and zip
    match = re.search(r'([\w\s]+),\s*([A-Z]{2})\s*(\d{5})', address)
    
    if match:
        city = match.group(1).strip()
        state = match.group(2).strip()
        zip_code = match.group(3).strip()

        # Try city, state, zip first
        query = f"{city}, {state} {zip_code}"
    else:
        # If city/state not found, fallback to just zip
        zip_match = re.search(r'\b\d{5}\b', address)
        if zip_match:
            zip_code = zip_match.group()
            query = zip_code
        else:
            # Can't even get a zip code
            still_failed[hospital] = 'No parsable location info'
            continue  # move to next hospital

    # Try geocoding the new query
    try:
        location = geolocator.geocode(query)
        if location:
            second_pass_coords[hospital] = (location.latitude, location.longitude)
            print(f"Second pass success for {hospital}: {location.latitude}, {location.longitude}")
        else:
            print(f"Second pass failed for {hospital}")
            still_failed[hospital] = 'Geocoding failed'
    except Exception as e:
        still_failed[hospital] = str(e)
    
    time.sleep(1)  # Respect rate limits

Second pass success for Helen Hayes Hospital: 41.2098301, -73.9857513
Second pass success for Arnot Ogden Medical Center: 42.0897965, -76.8077338
Second pass success for Bellevue Hospital Center: 40.7127281, -74.0060152
Second pass success for Canton-Potsdam Hospital: 44.5956163, -75.1690942
Second pass success for Coney Island Hospital: 40.7127281, -74.0060152
Second pass success for Brookdale Hospital Medical Center: 40.6526006, -73.9497211
Second pass success for Carthage Area Hospital Inc: 44.152159, -75.321236
Second pass success for Catskill Regional Medical Center: 41.6556465, -74.6893282
Second pass success for Catskill Regional Medical Center - G. Hermann Site: 41.7142564, -74.726273
Second pass success for Clifton-Fine Hospital: 44.1601506, -75.034694
Second pass success for Cobleskill Regional Hospital: 42.677853, -74.4854172
Second pass success for Cohen Children's Medical Center: 40.7308451, -73.6805499
Second pass success for Columbia Memorial Hospital: 42.2540699, -73.79

In [7]:
still_failed

{}

In [11]:
for hospital, coords in second_pass_coords.items():
    hospital_to_coords[hospital] = coords

In [12]:
with open("hospital_to_coord.json", "w") as file:
    json.dump(hospital_to_coords, file, indent=4)