# I. Experimentation with API

In [1]:
import requests

In [2]:
base_url = 'https://nominatim.openstreetmap.org/search?'

params = {
    'format':'json',
    'q': 'Eiffel tower'
}

In [3]:
result = requests.get(base_url, params=params)
result.status_code

200

In [4]:
result.json()[:2]

[{'place_id': '69121935',
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'way',
  'osm_id': '5013364',
  'boundingbox': ['48.8574753', '48.8590465', '2.2933084', '2.2956897'],
  'lat': '48.8582602',
  'lon': '2.29449905431968',
  'display_name': 'Tour Eiffel, 5, Avenue Anatole France, Gros-Caillou, 7e, Paris, Île-de-France, France métropolitaine, 75007, France',
  'class': 'tourism',
  'type': 'attraction',
  'importance': 0.653772102971417,
  'icon': 'https://nominatim.openstreetmap.org/images/mapicons/poi_point_of_interest.p.20.png'},
 {'place_id': '28224069',
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'node',
  'osm_id': '2689315641',
  'boundingbox': ['51.33355', '51.33365', '-116.23505', '-116.23495'],
  'lat': '51.3336',
  'lon': '-116.235',
  'display_name': 'Eiffel Tower, Alberta, Canada',
  'class': 'natural',
  'type': 'peak',
  'importance': 0.5,
  'icon': 'https:/

In [5]:
params = {
    'format':'json',
    'q': 'Cair Paravel, Narnia', 'limit':1
    }

requests.get(base_url, params=params).json()

[]

# Function

In [6]:
import requests
from time import sleep
 
base_url = 'https://nominatim.openstreetmap.org/search?'
 
def nominatim_geocode(address, format='json', limit=1, **kwargs):
    '''thin wrapper around nominatim API.
 
    Documentation: https://wiki.openstreetmap.org/wiki/Nominatim#Parameters
    '''
    params = {'q':address, 'format':format, 'limit':limit, **kwargs}
 
    response = requests.get(base_url, params=params)
    response.raise_for_status() # will raise exception if status is unsuccessful
 
    sleep(1) # sleep 
    return response.json()

In [7]:
nominatim_geocode('Eiffel tower')

[{'place_id': '69121935',
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'way',
  'osm_id': '5013364',
  'boundingbox': ['48.8574753', '48.8590465', '2.2933084', '2.2956897'],
  'lat': '48.8582602',
  'lon': '2.29449905431968',
  'display_name': 'Tour Eiffel, 5, Avenue Anatole France, Gros-Caillou, 7e, Paris, Île-de-France, France métropolitaine, 75007, France',
  'class': 'tourism',
  'type': 'attraction',
  'importance': 0.653772102971417,
  'icon': 'https://nominatim.openstreetmap.org/images/mapicons/poi_point_of_interest.p.20.png'}]

In [8]:
nominatim_geocode(address=None,
                  street='221B Baker Street', 
                  city='London', country='Great Britain')

[{'place_id': '50843439',
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'node',
  'osm_id': '3916613190',
  'boundingbox': ['51.5237104', '51.5238104', '-0.1585445', '-0.1584445'],
  'lat': '51.5237604',
  'lon': '-0.1584945',
  'display_name': 'The Sherlock Holmes Museum, 221B, Baker Street, Marylebone, City of Westminster, London, Greater London, England, NW1 6XE, UK',
  'class': 'tourism',
  'type': 'museum',
  'importance': 0.5209999999999999,
  'icon': 'https://nominatim.openstreetmap.org/images/mapicons/tourist_museum.p.20.png'}]

# Reading and Writing to CSV files

In [9]:
from csv import DictReader, DictWriter
path = './cities.csv'

In [10]:
def read_csv(path):
    '''read csv and return it as a list of dictionaries, one per row'''
    with open(path, 'r') as f:
        return list(DictReader(f))


def write_csv(data, path, mode='w'):
    '''write data to csv or append to existing one'''
    if mode not in 'wa':  # 'a' mode will append to the existing file, if it exists
        raise ValueError("mode should be either 'w' or 'a'")  
    
    with open(path, mode) as f:
        writer = DictWriter(f, fieldnames=data[0].keys())
        if mode == 'w':
            writer.writeheader() 

        for row in data:
            writer.writerow(row)   

In [11]:
cities = read_csv('./cities.csv')
cities[0]

OrderedDict([('name', 'Tokio'), ('population', '38.05'), ('country', 'Japan')])

In [12]:
write_csv(cities, './my_cities.csv')

# Batch geocode

In [13]:
from tqdm import tqdm

In [27]:
def geocode_bulk(data, column='address', verbose=False):
    '''assuming data is an iterable of dicts, will attempt to geocode each,
    treating {column} as an address. Returns 2 iterables - result and errored rows'''
    result, errors = [], []

    for row in tqdm(data):
        try:
            search = nominatim_geocode(row[column], limit=1)
            if len(search) == 0: # no location found:
                result.append(row)
                if verbose:
                    print(f"Can't find anything for {row[column]}")
                    
            else:
                info = search[0]  # most "important" result
                info.update(row)  # merge two dicts
                result.append(info) 
        except Exception as e:
            if verbose:
                print(e)
            row['error'] = e
            errors.append(row)
    
    if len(errors) > 0 and verbose:
        print(f'{len(errors)}/{len(data)} rows failed')

    return result, errors

In [28]:
len(cities)

10

In [29]:
result, errors = geocode_bulk(cities, column='name', verbose=True)

100%|██████████| 10/10 [00:13<00:00,  1.39s/it]


In [30]:
len(result)

10

In [21]:
errors

[]

# Script

In [None]:
# functions above + this:
# path_in = './capitals.csv'
# path_out = './geocoded_capitals.csv'

# data = read_csv(path_in)
# result, errors = geocode_bulk(data, column='name', verbose=True)
# write_csv(result, path_out)

Check out the final version of the script - geocode.py in the same folder