In [55]:

# interacting with Nomination's api
import requests
from time import sleep

base_url = 'https://nominatim.openstreetmap.org/search?'


## Function Block

In [56]:
# creating the request function that will interact
# with the api
from functools import lru_cache

# wrapping the function in a cache function to allow
# for local acess of data
@lru_cache(maxsize=2000)
def nominatim_geocode(address = None    , format='json', limit=1, **kwargs):
    '''thin wrapper around nominatim API.
    Documentation: https://wiki.openstreetmap.org/wiki
    /Nominatim#Parameters
    '''
    
    # setting the parameters for the request
    params = {
        "q": address,
        "format": format,
        "limit": limit,
        **kwargs
    }
    
    # send the get request
    response = requests.get(base_url, params=params)
    #check for a valid response
    response.raise_for_status
    
    #sleeping so we don't get in trouble
    sleep(1)
    
    print(response)

    # returning the response in a parsed json
    return response.json()

### Sample request

In [57]:
nominatim_geocode('Eiffel Tower')

<Response [200]>


[{'place_id': 99784192,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
  'osm_type': 'way',
  'osm_id': 5013364,
  'boundingbox': ['48.8574753', '48.8590453', '2.2933084', '2.2956897'],
  'lat': '48.858260200000004',
  'lon': '2.2944990543196795',
  'display_name': 'Tour Eiffel, 5, Avenue Anatole France, Quartier du Gros-Caillou, Paris 7e Arrondissement, Paris, Île-de-France, France métropolitaine, 75007, France',
  'class': 'tourism',
  'type': 'attraction',
  'importance': 0.6868325701744196,
  'icon': 'https://nominatim.openstreetmap.org/ui/mapicons/poi_point_of_interest.p.20.png'}]

### Reading and writing the data

In [58]:
# using the csv library
from csv import DictReader, DictWriter

def read_csv(path:str):
    """read csv and return it as a list of dictonaries

    Args:
        path (str): this is the file path to csv
    """
    
    # opening and writing to the csv
    with open(path, 'r') as f:
        return list(DictReader(f))

def write_csv(data, path, mode='w'):
    """Write data to csv

    Args:
        data (dict): data dictonary to be written to csv
        path (str): this is the path to new csv
        mode (str, optional): _description_. Defaults to 'w'.

    Raises:
        ValueError: The value for mode is not correct
    """
    
    # Raise error if there isn't a valid write mode
    if mode not in 'wa':
        raise ValueError("mode should be either 'w' or 'a'")
    
    # opens / creates the file and writes to the csv
    with open(path, mode) as f:
        writer = DictWriter(f, fieldnames=data[0].keys())
        if mode == 'w':
            writer.writeheader() 

        for row in data:
            writer.writerow(row)  
        

### Testing the code :)

In [59]:
cities = read_csv('./cities.csv')

### Adding a progress bar

Shows an example of the progress bar

In [76]:
from tqdm import tqdm

collection = ['Apple', 'Banana', 'Orange']

for fruit in tqdm(collection):
    pass

100%|██████████| 3/3 [00:00<00:00, 66576.25it/s]


### Writing a bulk function to handle a lot of request
This adds in some error handling to all of the request

In [73]:
def geocode_bulk(data, column='address', verbose=False):
    """assuming all of the data is an iterable of dicts,
    this will attempt to geocode each of them, treating {column}
    as and address

    Args:
        data (dict): dictonary of address
        column (str, optional): column to search on. Defaults to 'address'.
        verbose (bool, optional): displays extra data. Defaults to False.
    """
    result, errors = [], []
    for row in tqdm(data):
            try:
                search = nominatim_geocode(row[column], limit=1)
                if len(search) == 0: # no location found:
                    result.append(row)
                    if verbose:
                        print(f"Can't find anything for {row[column]}")
                        
                else:
                    info = search[0] # most "important" result
                    info.update(row) # merge two dicts
                    result.append(info) 
            except Exception as e:
                if verbose:
                    print(e)
                row['error'] = e
                errors.append(row)
        
    if len(errors) > 0 and verbose:
        print(f'{len(errors)}/{len(data)} rows failed')

    return result, errors

### Trying out the bulk function


In [75]:
result, errors = geocode_bulk(cities, column='name',verbose=True)

100%|██████████| 10/10 [00:00<00:00, 200684.40it/s]

Can't find anything for Mumbau



