In [1]:
import uszipcode
from uszipcode import SearchEngine
import geopy
from geopy.geocoders import Nominatim
import pandas as pd
import os
import csv
from pathlib import Path
import datetime as dt

In [2]:
# load zip code dictionary created in previous file
zipcode_file = Path('./Resources/zipcodes.csv')
zipcodes_df = pd.read_csv(zipcode_file)
zipcodes_df

Unnamed: 0,RegionID,City,State,Zip Code
0,17426,Chicago,Illinois,60601
1,18959,Las Vegas,Nevada,89101
2,38128,Dallas,Texas,75201
3,10920,Columbus,Ohio,43085
4,12455,Louisville,Kentucky,40202
...,...,...,...,...
811,27950,Wildwood,New Jersey,8260
812,36173,Miramar Beach,Florida,32550
813,21021,Vail,Arizona,85641
814,25643,Longboat Key,Florida,34228


## CREATE GEOCODING API SEARCH TO ADD COORDINATE DATA

### We'll use the Nominatim API through GeoPy

In [3]:
# Create empty dictionaries to store our results
zipcodes_coords_dict = {"RegionID": [], "City": [], "State": [], "Zip Code": [], "Latitude": [], "Longitude": []}
bad_coords = {"RegionID": [], "City": [], "State": [], "Zip Code": []}
geolocator = Nominatim(user_agent="coords_locator")
for i in zipcodes_df.index:
    try:
        country= "United States"
        place = zipcodes_df["Zip Code"].iloc[i]
        city=zipcodes_df['City'].iloc[i]
        state=zipcodes_df['State'].iloc[i]
        regionId=zipcodes_df['RegionID'].iloc[i]
        location = geolocator.geocode({"postalcode": place, "country": country})
        for loc in location:
            if regionId not in zipcodes_coords_dict["RegionID"]:
                zipcodes_coords_dict['RegionID'].append(regionId)
                zipcodes_coords_dict['City'].append(city)
                zipcodes_coords_dict['State'].append(state)
                zipcodes_coords_dict['Zip Code'].append(place)
                zipcodes_coords_dict['Latitude'].append(location.latitude)
                zipcodes_coords_dict['Longitude'].append(location.longitude)
                print((int(i)+1, city, state, location.latitude, location.longitude))
    except:
        # Add locations with errors to separate dictionary
        bad_coords['RegionID'].append(regionId)
        bad_coords['City'].append(city)
        bad_coords['State'].append(state)
        bad_coords['Zip Code'].append(place)
        print(f"--------No coords found for {city}, {state}--------")
        pass

(1, 'Chicago', 'Illinois', 41.84393511914893, -87.78635165265958)
(2, 'Las Vegas', 'Nevada', 36.16714483982684, -115.13964383593073)
(3, 'Dallas', 'Texas', 32.78546690563107, -96.79829482601941)
(4, 'Columbus', 'Ohio', 40.09538501587436, -83.0375997279287)
(5, 'Louisville', 'Kentucky', 38.25122790183851, -85.7494003126708)
(6, 'Orlando', 'Florida', 28.54301509869281, -81.3764156993464)
(7, 'Denver', 'Colorado', 39.750888708885945, -104.99662415053051)
(8, 'Washington', 'District of Columbia', 38.915410882917115, -77.01782625863444)
(9, 'Portland', 'Oregon', 45.508517232669156, -122.69277027160646)
(10, 'Nashville', 'Tennessee', 36.16435977416667, -86.7747828575)
(11, 'Omaha', 'Nebraska', 41.26104168238994, -95.93493489979036)
(12, 'Oklahoma City', 'Oklahoma', 35.45273474888179, -97.5166614683706)
(13, 'Raleigh', 'North Carolina', 35.77797953110774, -78.6341786013657)
(14, 'Colorado Springs', 'Colorado', 38.756039618181816, -104.80601142727272)
(15, 'Minneapolis', 'Minnesota', 44.982154

In [4]:
# Create a dataframe of locations with no coordinates, to see how many we're losing from our dataset
bad_coords_df = pd.DataFrame(bad_coords, columns=["RegionID", "City", "State", "Zip Code"])
bad_coords_df

Unnamed: 0,RegionID,City,State,Zip Code
0,11722,Greensboro,North Carolina,27395
1,17759,Des Moines,Iowa,50307
2,50779,Asheville,North Carolina,28801
3,33058,New Bedford,Massachusetts,2740
4,39558,Lynn,Massachusetts,1901
...,...,...,...,...
85,37287,Barrington,Rhode Island,2806
86,5027,Hackettstown,New Jersey,7840
87,21366,Woodbury,New Jersey,8096
88,54461,South Amboy,New Jersey,8879


In [5]:
# create dataframe from viable locations
zip_coords_df_cleaned = pd.DataFrame(zipcodes_coords_dict, columns=['RegionID', 'City', 'State', 'Zip Code', 'Latitude', 'Longitude'])
zip_coords_df_cleaned

Unnamed: 0,RegionID,City,State,Zip Code,Latitude,Longitude
0,17426,Chicago,Illinois,60601,41.843935,-87.786352
1,18959,Las Vegas,Nevada,89101,36.167145,-115.139644
2,38128,Dallas,Texas,75201,32.785467,-96.798295
3,10920,Columbus,Ohio,43085,40.095385,-83.037600
4,12455,Louisville,Kentucky,40202,38.251228,-85.749400
...,...,...,...,...,...,...
721,26869,Royersford,Pennsylvania,19468,40.206897,-75.529549
722,36173,Miramar Beach,Florida,32550,30.384095,-86.349680
723,21021,Vail,Arizona,85641,31.955139,-110.762746
724,25643,Longboat Key,Florida,34228,27.389929,-82.639538


In [6]:
# Export zip_coords_df to CSV, which will serve as our finalized dictionary of location data to unique RegionID
zip_coords_df_cleaned.to_csv("./Resources/zipcodes_coordinates.csv", index=False)