In [1]:
"""
General Data Science Packages
"""
import numpy as np
import pandas as pd
import geopandas as gpd
# import fiona
# import shapely
from shapely.geometry import shape

"""
Data Managment Packages
"""
# import time
# import os
import ast

"""
Geocoding Packages
"""
# import geopy as gp
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from functools import partial

"""
Distance Calculations
"""
# from geopy.distance import geodesic
# from geopy.distance import great_circleQ

"""
Check Python Version
"""
!python --version

Python 3.8.3


# 3. Geocoding

## 3.0. Prepare the editted gazetteer

In [2]:
G_ = pd.read_csv('data_raw/editted_gazetteer/210829_editted_gazetteer.csv')

G_

Unnamed: 0,L1_orig,L2_orig,L3_orig,tuple_orig,L1_edit,L1_chg,L2_edit,L2_chg,L3_edit,L3_chg
0,-3,-1,-1,"('-3', '-1', '-1')",-3,False,-1,False,-1,False
1,-1,-1,-1,"('-1', '-1', '-1')",-1,False,-1,False,-1,False
2,46,-1,-1,"('46', '-1', '-1')",-16,True,-1,False,-1,False
3,99,-1,-1,"('99', '-1', '-1')",-16,True,-1,False,-1,False
4,37 Military Hospital,-1,-1,"('37 Military Hospital', '-1', '-1')",-18,True,-1,False,-1,False
...,...,...,...,...,...,...,...,...,...,...
8930,Westminster,London,Uk,"('Westminster', 'London', 'Uk')",Westminster,False,London,False,United Kingdom,True
8931,Wilcox,Georgia,Usa,"('Wilcox', 'Georgia', 'Usa')",Wilcox County,True,Georgia,False,United States Of America,True
8932,Would Wish To Move But Destination Still Unknown,,,('Would Wish To Move But Destination Still Unk...,-15,True,-11,True,-11,True
8933,York,York,Canada,"('York', 'York', 'Canada')",York,False,Toronto,True,Canada,False


In [191]:
# run to reset the working dataframe to original dataframe
G = G_.copy()

### 3.0.a. Read and check the original tuples in the editted gazetteer

In [4]:
# print columns and their position in the data
for i, col in enumerate(list(G.columns)):
    print(str(i) + '\t' + str(col))

0	L1_orig
1	L2_orig
2	L3_orig
3	tuple_orig
4	L1_edit
5	L1_chg
6	L2_edit
7	L2_chg
8	L3_edit
9	L3_chg


##### Check for any problematic duplicate tuples in the rows

Since later the geodata queried will be merged based on the tuples, any duplicate tuples could create problems by reshaping the data. The relationship between original tuples and the cleaned tuples must be one-to-one.

In [184]:
# check for any potential row-wise errors
print('\nDuplicate rows?:')
print(G.duplicated().value_counts())

# check for duplicate tuples
print('\nUnique tuples?:')
print(G['tuple_orig'].is_unique)

if G['tuple_orig'].is_unique is False:
    
    print('\n======\n\nDUPLICATES DIAGNOSTIC:')
    
    G_dups = G.duplicated(subset='tuple_orig', keep=False)
    
    print('\nDuplicate original tuples counts: ')
    print(G_dups.value_counts())
    
    G_duprows = G.loc[G_dups == True, :]
    
    print('------')
    print('Same number of duplicate editted tuples? \n(If so, rows may be exactly duplicated and mostly likely can be dropped.)')
    print(G_duprows.duplicated(subset=['L1_Edit_Final', 'L2_Edit_Final', 'L3_Edit_Final'], keep=False).value_counts())
    
    print('\nIndices of the duplicated rows: ')
    print(G_duprows.index)
    
    G_dupcount = pd.DataFrame(G_duprows['tuple_orig'].value_counts()).reset_index()

    print('\nUnique tuples within the duplicated rows: ')
    print(len(G_dupcount.index))
    
    print('\nValue counts of tuples in duplicated rows: ')
    display(G_dupcount)


Duplicate rows?:
False    8935
dtype: int64

Unique tuples?:
True


##### Function to evaluate the strings of the CSV as tuple literals

In [6]:
# function to map over the string elements of the dataframe
def str_eval(e):
    """
    A function to evaluate string element in a dataframe literally, such as the multilocation tuples.
    
    RETURNS: The Python literal. 
    """
    # 01 | Ensure the element is read by ast.literal_eval() as a string to avoid errors.
    e = str(e)
    
    # 02 | Filter out any null values
    if (e != 'nan'):
        
        # 03 | Evaluate the string literally
        try:
            ml = ast.literal_eval(e)
        
        # If unable to read the string, print it so it can be traced back to address issues
        except:
            print(e)
            ml = None
    
    # Return null values as null values
    else:
        ml = None
        
    return ml

### 3.0.b Clean the editted columns to create the query columns

##### Filter out null indicators

In [195]:
# list of bad values to avoid querying
badvals = ['NaN', '0', '-1', '-3', '1', '5', '-10', '-11', '-13', '-15', '-16', '-18']

G['L1_query'] = G['L1_edit'].replace(badvals, np.nan)
G['L2_query'] = G['L2_edit'].replace(badvals, np.nan)
G['L3_query'] = G['L3_edit'].replace(badvals, np.nan)

G

Unnamed: 0,L1_orig,L2_orig,L3_orig,tuple_orig,L1_edit,L1_chg,L2_edit,L2_chg,L3_edit,L3_chg,L1_query,L2_query,L3_query
0,-3,-1,-1,"('-3', '-1', '-1')",-3,False,-1,False,-1,False,,,
1,-1,-1,-1,"('-1', '-1', '-1')",-1,False,-1,False,-1,False,,,
2,46,-1,-1,"('46', '-1', '-1')",-16,True,-1,False,-1,False,,,
3,99,-1,-1,"('99', '-1', '-1')",-16,True,-1,False,-1,False,,,
4,37 Military Hospital,-1,-1,"('37 Military Hospital', '-1', '-1')",-18,True,-1,False,-1,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8930,Westminster,London,Uk,"('Westminster', 'London', 'Uk')",Westminster,False,London,False,United Kingdom,True,Westminster,London,United Kingdom
8931,Wilcox,Georgia,Usa,"('Wilcox', 'Georgia', 'Usa')",Wilcox County,True,Georgia,False,United States Of America,True,Wilcox County,Georgia,United States Of America
8932,Would Wish To Move But Destination Still Unknown,,,('Would Wish To Move But Destination Still Unk...,-15,True,-11,True,-11,True,,,
8933,York,York,Canada,"('York', 'York', 'Canada')",York,False,Toronto,True,Canada,False,York,Toronto,Canada


##### Replace incomplete edits with original

In [196]:
for L in ['L1', 'L2']:
    
    # 01 | locate rows where the edit column is blank but the original column is not blank
    row_blank = G[G[(L+'_edit')].isna() & ~G[(L+'_orig')].isna()]

    print('======\n{} rows for {}'.format(len(row_blank.index), L))
    
    print('Before: ')
    display(row_blank[[L+'_orig', L+'_edit', L+'_query']].head(6))

    # 02 | put the original names back into the query column
    G.at[row_blank.index, L+'_query'] = G.loc[row_blank.index, L+'_orig']

    print('After: ')
    display(pd.DataFrame(G.loc[row_blank.index, L+'_query']).head(6))

1560 rows for L1
Before: 


Unnamed: 0,L1_orig,L1_edit,L1_query
3239,Democratic Republic Of Congo,,
5075,Juja,,
5081,Mountain View,,
5084,Wilson,,
5086,Windhoek,,
5116,All Parts Of Niger,,


After: 


Unnamed: 0,L1_query
3239,Democratic Republic Of Congo
5075,Juja
5081,Mountain View
5084,Wilson
5086,Windhoek
5116,All Parts Of Niger


873 rows for L2
Before: 


Unnamed: 0,L2_orig,L2_edit,L2_query
5084,Wilson,,
5086,Windhoek,,
5091,Bilma,,
5092,Birmingham Koni,,
5098,Harobanda In Niamey,,
5104,Moradey,,


After: 


Unnamed: 0,L2_query
5084,Wilson
5086,Windhoek
5091,Bilma
5092,Birmingham Koni
5098,Harobanda In Niamey
5104,Moradey


In [197]:
for L in ['L1', 'L2']:

    # 01 | locate rows where a -19 was added for unsearchable
    row_19 = G[G[L+'_edit'] == '-19']

    print('======\n{} rows for {}'.format(len(row_blank.index), L))
    
    print('Before: ')
    display(row_19[[L+'_orig', L+'_edit', L+'_query']].head(6))

    # 02 | put the original names back into the query column
    G.at[row_19.index, L+'_query'] = G.loc[row_19.index, L+'_orig'].replace('-19', np.nan)

    print('After: ')
    display(pd.DataFrame(G.loc[row_19.index, L+'_query']).head(6))

873 rows for L1
Before: 


Unnamed: 0,L1_orig,L1_edit,L1_query
42,-19,-19,-19
230,Ashabiena,-19,-19
269,Benin Dagbo,-19,-19
276,Dzefa,-19,-19
277,Ekpe,-19,-19
278,Fidzrose,-19,-19


After: 


Unnamed: 0,L1_query
42,
230,Ashabiena
269,Benin Dagbo
276,Dzefa
277,Ekpe
278,Fidzrose


873 rows for L2
Before: 


Unnamed: 0,L2_orig,L2_edit,L2_query
250,Dakunbrua Banglacesh,-19,-19
262,Weme,-19,-19
308,Kgamodishe,-19,-19
325,Nyamurenge,-19,-19
331,Ruvira,-19,-19
392,Tutsi,-19,-19


After: 


Unnamed: 0,L2_query
250,Dakunbrua Banglacesh
262,Weme
308,Kgamodishe
325,Nyamurenge
331,Ruvira
392,Tutsi


### 3.0.c. Bind query columns into multilocation tuples

In [198]:
# function to perform the binds
def bind(l1, l2, l3):
    """
    A function that binds column values into a multilocation array
    based on the column names passed in.
    
    RETURNS: An tuple of multilocation array(s).
    """
    # 01 | Create an array of strings from the values passed in 
    if all(l is np.nan for l in [l1, l2, l3]):
        return np.nan
    else:
        a = np.array([l1, l2, l3])
        t = tuple(a)
        
    return t

In [199]:
G['tuple_query'] = G.apply(lambda row : bind( row['L1_query'],
                                              row['L2_query'],
                                              row['L3_query'] ), axis=1)

display(G[['L1_query', 'L2_query', 'L3_query', 'tuple_query']])

Unnamed: 0,L1_query,L2_query,L3_query,tuple_query
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
...,...,...,...,...
8930,Westminster,London,United Kingdom,"(Westminster, London, United Kingdom)"
8931,Wilcox County,Georgia,United States Of America,"(Wilcox County, Georgia, United States Of Amer..."
8932,,,,
8933,York,Toronto,Canada,"(York, Toronto, Canada)"


### 3.0.d. Create Query Strings to call through Nominatim

In [221]:
# Method A: L1 + L3
G['str_query_A'] = G['L1_query'] + ', ' + G['L3_query']

# Method B: L1 + L2 + L3 (backup)
G['str_query_B'] = G['L1_query'] + ', ' + G['L2_query'] + ', ' + G['L3_query']

G

Unnamed: 0,L1_orig,L2_orig,L3_orig,tuple_orig,L1_edit,L1_chg,L2_edit,L2_chg,L3_edit,L3_chg,L1_query,L2_query,L3_query,tuple_query,string_query,str_query_A,str_query_B
0,-3,-1,-1,"('-3', '-1', '-1')",-3,False,-1,False,-1,False,,,,,,,
1,-1,-1,-1,"('-1', '-1', '-1')",-1,False,-1,False,-1,False,,,,,,,
2,46,-1,-1,"('46', '-1', '-1')",-16,True,-1,False,-1,False,,,,,,,
3,99,-1,-1,"('99', '-1', '-1')",-16,True,-1,False,-1,False,,,,,,,
4,37 Military Hospital,-1,-1,"('37 Military Hospital', '-1', '-1')",-18,True,-1,False,-1,False,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8930,Westminster,London,Uk,"('Westminster', 'London', 'Uk')",Westminster,False,London,False,United Kingdom,True,Westminster,London,United Kingdom,"(Westminster, London, United Kingdom)","Westminster, United Kingdom","Westminster, United Kingdom","Westminster, London, United Kingdom"
8931,Wilcox,Georgia,Usa,"('Wilcox', 'Georgia', 'Usa')",Wilcox County,True,Georgia,False,United States Of America,True,Wilcox County,Georgia,United States Of America,"(Wilcox County, Georgia, United States Of Amer...","Wilcox County, United States Of America","Wilcox County, United States Of America","Wilcox County, Georgia, United States Of America"
8932,Would Wish To Move But Destination Still Unknown,,,('Would Wish To Move But Destination Still Unk...,-15,True,-11,True,-11,True,,,,,,,
8933,York,York,Canada,"('York', 'York', 'Canada')",York,False,Toronto,True,Canada,False,York,Toronto,Canada,"(York, Toronto, Canada)","York, Canada","York, Canada","York, Toronto, Canada"


## 3.1. Query from Nominatim and Add Data to Gazetteer

### 3.1.a. Querying for Method A: L1 + L3

##### Create a filtered table of only valid and unique queries for Method A

In [345]:
print('\nUnique Multilocations: \n{}'.format(len(G['str_query_A'])))

to_query = G['str_query_A'].dropna()
print('\nNon-NaN Queries (Method A): \n{}'.format(len(query)))

query = G['str_query_A'].dropna().drop_duplicates()
print('\nUnique Queries (Method A): \n{}'.format(len(query)))
query = pd.DataFrame(query, index=query.index, columns=['str_query_A'])
query['result'] = None

display(query)


Unique Multilocations: 
8935

Non-NaN Queries (Method A): 
3866

Unique Queries (Method A): 
3866


Unnamed: 0,str_query_A,result
5,"Accra, Ghana",
6,"Achimota, Ghana",
7,"Adenta, Ghana",
11,"Agbogbloshie, Ghana",
13,"Jamestown, Ghana",
...,...,...
8922,"Victoria, Seychelles",
8924,"Buginyanya, Uganda",
8929,"Westminster, United Kingdom",
8931,"Wilcox County, United States Of America",


##### Query from Nominatim API

In [229]:
# 01 - Create and instance of a Nominatim class
geocoder = Nominatim(user_agent = 'mt')

# 02 - Rate limit to avoid timeout errors or being blocked by the API
geocode = RateLimiter(geocoder.geocode, min_delay_seconds=2, max_retries=2)

# CALL LOCATIONS FROM NOMINATIM
query['result'] = query['str_query_A'].apply(partial(geocode, exactly_one=0, addressdetails=1, extratags=1, geometry='geojson'))

# Back up the original query to avoid having to rerun the query
query_A_backup = query.copy()

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Saint Quen, France',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return s

RateLimiter swallowed an error after 2 retries. Called with (*('Saint Quen, France',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return self

RateLimiter caught an error, retrying (1/2 tries). Called with (*('Britain, United Kingdom',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    ret

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Manste Agbona, Ghana',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return

RateLimiter swallowed an error after 2 retries. Called with (*('Manste Agbona, Ghana',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return se

RateLimiter caught an error, retrying (1/2 tries). Called with (*('Mile 2 Estate, Nigeria',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    retu

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Saint-Quen, France',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return s

RateLimiter swallowed an error after 2 retries. Called with (*('Saint-Quen, France',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    return self

RateLimiter caught an error, retrying (1/2 tries). Called with (*('No Area, South Africa',), **{'exactly_one': 0, 'addressdetails': 1, 'extratags': 1, 'geometry': 'geojson'}).
Traceback (most recent call last):
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 426, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "D:\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    httplib_response = conn.getresponse()
  File "D:\anaconda3\lib\http\client.py", line 1332, in getresponse
    response.begin()
  File "D:\anaconda3\lib\http\client.py", line 303, in begin
    version, status, reason = self._read_status()
  File "D:\anaconda3\lib\http\client.py", line 264, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "D:\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
  File "D:\anaconda3\lib\ssl.py", line 1241, in recv_into
    retur

##### Extract the raw attribute of the nominatim result

In [349]:
# Get the raw attribute of the nominatim places result
def get_raw(x):
    if x is not None: r = [i.raw for i in x]
    else: r = None
    return r

# 01 | missing results
query['missing'] = query['result'].isna()

print('\nMissing results: ')
print(query['missing'].value_counts())

# 02 | raw extraction
query['result_raw'] = query['result'].apply(get_raw)
print('\nExtracted results: \n{}'.format(len(query['result_raw'].dropna())))

display(query)


Missing results: 
False    2488
True     1378
Name: missing, dtype: int64

Extracted results: 
2488


Unnamed: 0,str_query_A,result,missing,result_raw
0,"South Kivu, Democratic Republic of the Congo","[(Sud-Kivu, République démocratique du Congo, ...",False,"[{'place_id': 51026579, 'licence': 'Data © Ope..."
1,"Zimmerman, Kenya","[(Zimmerman, Nairobi, 00618, Kenya, (-1.210396...",False,"[{'place_id': 50567887, 'licence': 'Data © Ope..."
2,"Aidan, Ghana","[(Ayidan Landfil Site, Ga West Municipal Distr...",False,"[{'place_id': 254174023, 'licence': 'Data © Op..."
3,"Nyegiti, Kenya",,True,
4,"Buchinga, Kenya",,True,
...,...,...,...,...
3861,"Nkwakwa, Ghana",,True,
3862,"Wangige, Kenya","[(Wangige, Kiambu, Central Kenya, P.O. BOX 307...",False,"[{'place_id': 59189115, 'licence': 'Data © Ope..."
3863,"Amusing Naa, Ghana",,True,
3864,"Dansoman, Ghana","[(Dansoman, Accra, Ablekuma West Municipal Dis...",False,"[{'place_id': 317138, 'licence': 'Data © OpenS..."


##### Generate a table of all results (multiple returned for single query)

In [321]:
tables = []

# using the indices of the valid results...
for i in list(query['result_raw'].dropna().index):
    # 01 | create a dataframe from the raw results
    ls = query['result_raw'][i]
    df = pd.DataFrame(ls)
    try:
        # 02 | convert the geojson result to geometry with shapely
        df['geometry'] = df['geojson'].apply(shape)
        df = df.set_geometry('geometry')
        # 03 | add reference to the query
        df['str_query_A'] = query['str_query_A'][i]
        # 04 | add the successful table to the list
        tables.append(df)
    except:
        print('Unable to create result table at index {}'.format(i))

print('\nNumber of successful result tables: {}'.format(len(tables)))

# 05 | Combine successful tables into a single table
table = pd.concat(tables)

# Export
table.to_csv('data_gen/gazetteer_geocoded/nominatim_results_A.csv')

# Preview
display(table)

Unable to create result table at index 60
Unable to create result table at index 173
Unable to create result table at index 319
Unable to create result table at index 380
Unable to create result table at index 913
Unable to create result table at index 1086
Unable to create result table at index 1518
Unable to create result table at index 1585
Unable to create result table at index 2146
Unable to create result table at index 2234
Unable to create result table at index 2465
Unable to create result table at index 2884
Unable to create result table at index 3301
Unable to create result table at index 3343
Unable to create result table at index 3481
Unable to create result table at index 3568

Number of successful result tables: 2472


Unnamed: 0,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,class,type,importance,address,geojson,extratags,icon,geometry,str_query_A
0,51026579,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,4346319745,"[-5.8653155, -0.6653155, 25.2705717, 30.4705717]",-3.2653155,27.8705717,"Sud-Kivu, République démocratique du Congo",place,state,0.750000,"{'place': 'Sud-Kivu', 'country': 'République d...","{'type': 'Point', 'coordinates': [27.8705717, ...",{'timezone': 'Africa/Lubumbashi'},,POINT (27.87057 -3.26532),"South Kivu, Democratic Republic of the Congo"
1,258736587,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,5642699,"[-5.0103229, -1.5834068, 26.8105741, 29.4359507]",-3.29689585,28.167400763366775,"Sud-Kivu, République démocratique du Congo",boundary,administrative,0.655702,"{'state': 'Sud-Kivu', 'country': 'République d...","{'type': 'Polygon', 'coordinates': [[[26.81057...","{'timezone': 'Africa/Lubumbashi', 'wikidata': ...",https://nominatim.openstreetmap.org/ui/mapicon...,"POLYGON ((26.81057 -2.33906, 26.81065 -2.33984...","South Kivu, Democratic Republic of the Congo"
0,50567887,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,4194272132,"[-1.2303969, -1.1903969, 36.8752212, 36.9152212]",-1.2103969,36.8952212,"Zimmerman, Nairobi, 00618, Kenya",place,suburb,0.495000,"{'suburb': 'Zimmerman', 'city': 'Nairobi', 'st...","{'type': 'Point', 'coordinates': [36.8952212, ...",{},https://nominatim.openstreetmap.org/ui/mapicon...,POINT (36.89522 -1.21040),"Zimmerman, Kenya"
0,254174023,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,854265282,"[5.7631208, 5.7685128, -0.4249518, -0.4188162]",5.7659029,-0.42259298126119743,"Ayidan Landfil Site, Ga West Municipal Distric...",landuse,landfill,0.310000,"{'landuse': 'Ayidan Landfil Site', 'county': '...","{'type': 'Polygon', 'coordinates': [[[-0.42495...",{},,"POLYGON ((-0.42495 5.76312, -0.42363 5.76324, ...","Aidan, Ghana"
0,726501,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,262710046,"[-28.626667, -28.546667, 31.358056, 31.438056]",-28.586667,31.398056,"Melmoth, Mthonjaneni Local Municipality, King ...",place,town,0.577719,"{'town': 'Melmoth', 'city': 'Mthonjaneni Local...","{'type': 'Point', 'coordinates': [31.398056, -...","{'is_in': 'KwaZulu-Natal, South Africa', 'sagn...",https://nominatim.openstreetmap.org/ui/mapicon...,POINT (31.39806 -28.58667),"Melmoth, South Africa"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,59699839,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,5287091325,"[5.5564562, 5.5565562, -0.2701064, -0.2700064]",5.5565062,-0.2700564,"Evangelical Presbyterian Church, Ghana, Martei...",amenity,place_of_worship,0.221000,"{'amenity': 'Evangelical Presbyterian Church, ...","{'type': 'Point', 'coordinates': [-0.2700564, ...",{'religion': 'christian'},https://nominatim.openstreetmap.org/ui/mapicon...,POINT (-0.27006 5.55651),"Dansoman, Ghana"
2,58314385,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,5087182231,"[5.5623736, 5.5624736, -0.2730803, -0.2729803]",5.5624236,-0.2730303,"Dansoman (SSNIT flats), Nsirewa Street, J7, Ot...",highway,bus_stop,0.211000,"{'highway': 'Dansoman (SSNIT flats)', 'road': ...","{'type': 'Point', 'coordinates': [-0.2730303, ...","{'bus': 'yes', 'public_transport': 'platform'}",https://nominatim.openstreetmap.org/ui/mapicon...,POINT (-0.27303 5.56242),"Dansoman, Ghana"
0,78458900,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,7248219885,"[7.8640237, 7.8641237, -2.6814372, -2.6813372]",7.8640737,-2.6813872,"Mayera DA Basic, Sampa - Berekum, Nwereme, Jam...",amenity,school,0.211000,"{'amenity': 'Mayera DA Basic', 'road': 'Sampa ...","{'type': 'Point', 'coordinates': [-2.6813872, ...",{},https://nominatim.openstreetmap.org/ui/mapicon...,POINT (-2.68139 7.86407),"Mayera, Ghana"
1,57816474,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,5024320817,"[5.7261695, 5.7262695, -0.2765537, -0.2764537]",5.7262195,-0.2765037,"Mayera Market, yehowa Da, Ga North Municipal D...",highway,bus_stop,0.211000,"{'highway': 'Mayera Market', 'road': 'yehowa D...","{'type': 'Point', 'coordinates': [-0.2765037, ...","{'bus': 'yes', 'public_transport': 'platform'}",https://nominatim.openstreetmap.org/ui/mapicon...,POINT (-0.27650 5.72622),"Mayera, Ghana"


##### Add the results back to the gazetteer

In [353]:
# 01 | sort table so highest importance results is first
to_merge = table.sort_values('importance', ascending=False)

# 02 | drop duplicate rows, keeping first
to_merge = table.drop_duplicates(subset=['str_query_A'], keep='first')

# Diagnostics
print('Unique key to merge on?: {}'.format(to_merge['str_query_A'].is_unique))
display(to_merge.head(3))

# 03 | perform merge
queried = pd.merge(query, to_merge, on='str_query_A', how='left', validate = '1:1')
G_a = pd.merge(G, queried, on='str_query_A', how='left', validate = 'm:1')

# Diagnostics
print('Non-geocoded gazetteer entries:')
print(G_a['missing'].value_counts())

# Export
G_a.to_csv('data_gen/gazetteer_geocoded/gazetteer_geocoded.csv')

# Preview
display(G_a[['tuple_query', 'missing', 'lat', 'lon', 'geometry']].dropna())

Unique key to merge on?: True


Unnamed: 0,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,class,type,importance,address,geojson,extratags,icon,geometry,str_query_A
0,51026579,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,4346319745,"[-5.8653155, -0.6653155, 25.2705717, 30.4705717]",-3.2653155,27.8705717,"Sud-Kivu, République démocratique du Congo",place,state,0.75,"{'place': 'Sud-Kivu', 'country': 'République d...","{'type': 'Point', 'coordinates': [27.8705717, ...",{'timezone': 'Africa/Lubumbashi'},,POINT (27.87057 -3.26532),"South Kivu, Democratic Republic of the Congo"
0,50567887,"Data © OpenStreetMap contributors, ODbL 1.0. h...",node,4194272132,"[-1.2303969, -1.1903969, 36.8752212, 36.9152212]",-1.2103969,36.8952212,"Zimmerman, Nairobi, 00618, Kenya",place,suburb,0.495,"{'suburb': 'Zimmerman', 'city': 'Nairobi', 'st...","{'type': 'Point', 'coordinates': [36.8952212, ...",{},https://nominatim.openstreetmap.org/ui/mapicon...,POINT (36.89522 -1.21040),"Zimmerman, Kenya"
0,254174023,"Data © OpenStreetMap contributors, ODbL 1.0. h...",way,854265282,"[5.7631208, 5.7685128, -0.4249518, -0.4188162]",5.7659029,-0.4225929812611974,"Ayidan Landfil Site, Ga West Municipal Distric...",landuse,landfill,0.31,"{'landuse': 'Ayidan Landfil Site', 'county': '...","{'type': 'Polygon', 'coordinates': [[[-0.42495...",{},,"POLYGON ((-0.42495 5.76312, -0.42363 5.76324, ...","Aidan, Ghana"


Non-geocoded gazetteer entries:
False    5241
True     1738
Name: missing, dtype: int64


Unnamed: 0,tuple_query,missing,lat,lon,geometry
5,"(Accra, Accra, Ghana)",False,5.5571096,-0.2012376,"POLYGON ((-0.28413 5.57195, -0.28386 5.57090, ..."
6,"(Achimota, Accra, Ghana)",False,5.6242019,-0.2276807,POINT (-0.22768 5.62420)
7,"(Adenta, Accra, Ghana)",False,5.7041391,-0.1687965,POINT (-0.16880 5.70414)
8,"(Adenta, Accra, Ghana)",False,5.7041391,-0.1687965,POINT (-0.16880 5.70414)
9,"(Adenta, Accra, Ghana)",False,5.7041391,-0.1687965,POINT (-0.16880 5.70414)
...,...,...,...,...,...
8926,"(Washington, Washington, United States Of Amer...",False,38.8949924,-77.0365581,"POLYGON ((-77.11979 38.93435, -77.11977 38.934..."
8929,"(Westminster, London, United Kingdom)",False,51.5004439,-0.1265398,POINT (-0.12654 51.50044)
8930,"(Westminster, London, United Kingdom)",False,51.5004439,-0.1265398,POINT (-0.12654 51.50044)
8931,"(Wilcox County, Georgia, United States Of Amer...",False,31.960784,-83.4379708,"POLYGON ((-83.61217 31.85394, -83.60108 31.853..."


### 3.1.a. Querying for Method B: L1 + L2 + L3 (Back-ups)

In [355]:
missing = G_b[G_b.missing == True]
print('\nMissing Multilocations: \n{}'.format(len(missing['str_query_B'])))

to_query = missing['str_query_B'].dropna()
print('\nNon-NaN Queries (Method B): \n{}'.format(len(query)))

query = missing['str_query_B'].dropna().drop_duplicates()
print('\nUnique Queries (Method B): \n{}'.format(len(query)))

query = pd.DataFrame(query, columns=['str_query_B'])
query['result'] = None

display(query)


Missing Multilocations: 
1738

Non-NaN Queries (Method B): 
3866

Unique Queries (Method B): 
1669


Unnamed: 0,str_query_B,result
18,"Ashabiena, Accra, Ghana",
50,"Ga Mantse Agbonaa, Jamestown, Ghana",
269,"Benin Dagbo, Allada, Benin",
276,"Dzefa, Cotonou, Benin",
278,"Fidzrose, Cotonou, Benin",
...,...,...
8818,"Plumtree, Bulawayo, Zimbambwe",
8845,"Saint Quen, Paris, France",
8846,"Saint-Quen, Paris, France",
8873,"Stugart, Stugart, Germany",


In [356]:
# 01 - Create and instance of a Nominatim class
geocoder = Nominatim(user_agent = 'mtB')

# 02 - Rate limit to avoid timeout errors or being blocked by the API
geocode = RateLimiter(geocoder.geocode, min_delay_seconds=2, max_retries=2)

# CALL LOCATIONS FROM NOMINATIM
query['result'] = query['str_query_B'].apply(partial(geocode, exactly_one=0, addressdetails=1, extratags=1, geometry='geojson'))

# Back up the original query to avoid having to rerun the query
query_B_backup = query.copy()

In [357]:
# Get the raw attribute of the nominatim places result
def get_raw(x):
    if x is not None: r = [i.raw for i in x]
    else: r = None
    return r

# 01 | missing results
query['missing'] = query['result'].isna()

print('\nMissing results: ')
print(query['missing'].value_counts())

# 02 | raw extraction
query['result_raw'] = query['result'].apply(get_raw)
print('\nExtracted results: \n{}'.format(len(query['result_raw'].dropna())))

display(query)


Missing results: 
True     1667
False       2
Name: missing, dtype: int64

Extracted results: 
2


Unnamed: 0,str_query_B,result,missing,result_raw
18,"Ashabiena, Accra, Ghana",,True,
50,"Ga Mantse Agbonaa, Jamestown, Ghana",,True,
269,"Benin Dagbo, Allada, Benin",,True,
276,"Dzefa, Cotonou, Benin",,True,
278,"Fidzrose, Cotonou, Benin",,True,
...,...,...,...,...
8818,"Plumtree, Bulawayo, Zimbambwe",,True,
8845,"Saint Quen, Paris, France",,True,
8846,"Saint-Quen, Paris, France",,True,
8873,"Stugart, Stugart, Germany",,True,


In [None]:
tables = []

# using the indices of the valid results...
for i in list(query['result_raw'].dropna().index):
    # 01 | create a dataframe from the raw results
    ls = query['result_raw'][i]
    df = pd.DataFrame(ls)
    try:
        # 02 | convert the geojson result to geometry with shapely
        df['geometry'] = df['geojson'].apply(shape)
        df = df.set_geometry('geometry')
        # 03 | add reference to the query
        df['str_query_B'] = query['str_query_B'][i]
        # 04 | add the successful table to the list
        tables.append(df)
    except:
        print('Unable to create result table at index {}'.format(i))

print('\nNumber of successful result tables: {}'.format(len(tables)))

# 05 | Combine successful tables into a single table
table = pd.concat(tables)

# Export
table.to_csv('data_gen/gazetteer_geocoded/nominatim_results_B.csv')

# Preview
display(table)

In [None]:
# 01 | sort table so highest importance results is first
to_merge = table.sort_values('importance', ascending=False)

# 02 | drop duplicate rows, keeping first
to_merge = table.drop_duplicates(subset=['str_query_B'], keep='first')

# Diagnostics
print('Unique key to merge on?: {}'.format(to_merge['str_query_B'].is_unique))
display(to_merge.head(3))

# 03 | perform merge
to_replace = pd.merge(to_query, to_merge, on='str_query_A', how='left')

display(to_replace)

In [None]:
nominatim_cols = \
['result', 'missing', 'result_raw', 'place_id', 'licence',
 'osm_type', 'osm_id', 'boundingbox', 'lat', 'lon', 'display_name',
 'class', 'type', 'importance', 'address', 'geojson', 'extratags',
 'icon', 'geometry']

## 3.2. Add Data to the Survey Multilocations

### 3.2.0. Load in datasets to merge together

##### Function to evaluate as literals

In [51]:
# function to map over the string elements of the dataframe
def str_eval(e):
    """
    A function to evaluate string element in a dataframe literally, such as the multilocation tuples.
    
    RETURNS: The Python literal. 
    """
    # 01 | Ensure the element is read by ast.literal_eval() as a string to avoid errors.
    e = str(e)
    
    # 02 | Filter out any null values
    if ((e != 'nan') | (e != 'None')):
        
        # 03 | Evaluate the string literally
        try:
            ml = ast.literal_eval(e)
        
        # If unable to read the string, print it so it can be traced back to address issues
        except:
            ml = None
    
    # Return null values as null values
    else:
        ml = None
        
    return ml

##### Geocoded Gazetteer

In [52]:
G_ = pd.read_csv('data_gen/gazetteer_geocoded/gazetteer_geocoded.csv')
G  = G_.copy()

print('Columns: ')
print(list(G.columns))

print('\nUnique original tuples: ')
print(G['tuple_orig'].is_unique)

G['tuple_orig'] = G['tuple_orig'].apply(str_eval)
G['tuple_query'] = G['tuple_query'].apply(str_eval)

G[['tuple_orig', 'tuple_query', 'lat', 'lon']]

Columns: 
['Unnamed: 0', 'L1_orig', 'L2_orig', 'L3_orig', 'tuple_orig', 'L1_edit', 'L1_chg', 'L2_edit', 'L2_chg', 'L3_edit', 'L3_chg', 'L1_query', 'L2_query', 'L3_query', 'tuple_query', 'string_query', 'str_query_A', 'str_query_B', 'result', 'missing', 'result_raw', 'place_id', 'licence', 'osm_type', 'osm_id', 'boundingbox', 'lat', 'lon', 'display_name', 'class', 'type', 'importance', 'address', 'geojson', 'extratags', 'icon', 'geometry']

Unique original tuples: 
True


Unnamed: 0,tuple_orig,tuple_query,lat,lon
0,"(-3, -1, -1)",,,
1,"(-1, -1, -1)",,,
2,"(46, -1, -1)",,,
3,"(99, -1, -1)",,,
4,"(37 Military Hospital, -1, -1)",,,
...,...,...,...,...
8930,"(Westminster, London, Uk)","(Westminster, London, United Kingdom)",51.500444,-0.126540
8931,"(Wilcox, Georgia, Usa)","(Wilcox County, Georgia, United States Of Amer...",31.960784,-83.437971
8932,(Would Wish To Move But Destination Still Unkn...,,,
8933,"(York, York, Canada)","(York, Toronto, Canada)",43.689619,-79.479188


##### Survey Multilocations

In [5]:
# csv of multilocations as tuples
mls_ = pd.read_csv('data_gen/survey_multilocations/survey_multilocations_tuples.csv')

# csv of multilocations as delimited string (alternative)
# mls_ = pd.read_csv('data_gen/survey_multilocations/survey_multilocations_string.csv')

mls = mls_.copy()
mls = mls.set_index('ID')
mls = mls.applymap(str_eval)

print('\nRows, Columns: {}'.format(mls.shape))
mls.head(3)


Rows, Columns: (1644, 44)


Unnamed: 0_level_0,109,127,134,142,152,201,202,206,211,214b,...,100b,101,302,308,229w,229x,229y,229z,231,238
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
276785,,,"((Kawango, Kisumu, Kenya), (Kawango, Kisumu, K...",,,"((Kawango, Kisumu, Kenya),)","((Manyatta, Kisumu, Kenya), (Kawangware, Nairo...",,,,...,"((-1, Kawango, Kenya),)","((Kisumu, Kisumu, Kenya),)","((-3, Nairobi, Kenya),)","((-3, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((Kawangware, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((Kawangware, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)"
276788,,"((N/A, N/A, N/A),)","((N/A, N/A, N/A),)",,,,,,,,...,"((-1, Kokal, Kenya),)","((Oyugis, Oyugis, Kenya),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)","((-1, -1, -1),)"
276802,,,"((Awendo, Migori, Kenya),)",,,"((Machakos, Machakos, Kenya), (Utawala, Nairob...",,,,,...,"((-1, Oyugjs, Kenya),)","((Oyugis, Oyugis, Kenya),)","((Utawala, Nairobi, Kenya),)","((Nairobi Cbd, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((Roysambu, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)","((Roysambu, Nairobi, Kenya),)","((-1, Nairobi, Kenya),)"


### 3.2.1. Merge to create Geocoded Survey

##### Functions to grab data from gazetteer and place into tuple multilocations

In [59]:
def grab_x(e, x):
    grabs = []
    for ml in e:
        grab = G.loc[G['tuple_orig'] == ml, x].item()
        grabs.append(grab)
    return grabs

def grab_clean(e):
    try:
        if e is not None:
            clean = grab_x(e, 'tuple_query')
            if any(clean):
                return tuple(list(clean))
            else:
                return None
        else:
            return None
    except:
        return None

def grab_ll(e):
    try:
        if e is not None:
            
            # grab the late and lon based on the old dataframe element
            lat = grab_x(e, 'lat')
            lon = grab_x(e, 'lon')
            
            # create tuples
            latlons = []
            for i in range(len(lat)):
                ls = [lat[i], lon[i]]
                t = tuple(ls)
                latlons.append(t)
            
            return tuple(latlons)
        else:
            return None
    except:
        return None

##### Loop to place multilocations tuples into survey

In [60]:
mls_ll = mls.copy()

mls_cols = []

for col in list(mls_ll.columns):
    
    col_q = col + '_q'
    col_ll = col + '_ll'
    
    mls_ll[col_q] = mls_ll[col].apply(grab_clean)
    mls_ll[col_ll] = mls_ll[col].apply(grab_ll)
    
    mls_cols.append(col)
    mls_cols.append(col_q) 
    mls_cols.append(col_ll)
    
mls_ll = mls_ll[mls_cols]

mls_ll.to_csv('data_gen/survey_geocoded/survey_multilocations_geocoded.csv')

mls_ll

Unnamed: 0_level_0,109,109_q,109_ll,127,127_q,127_ll,134,134_q,134_ll,142,...,229y_ll,229z,229z_q,229z_ll,231,231_q,231_ll,238,238_q,238_ll
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
276785,,,,,,,"((Kawango, Kisumu, Kenya), (Kawango, Kisumu, K...","((Kawango, Kisumu, Kenya), (Kawango, Kisumu, K...","((0.0, 31.983333000000002), (0.0, 31.983333000...",,...,"((nan, nan),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)","((Kawangware, Nairobi, Kenya),)","((Kawangware, Nairobi, Kenya),)","((-1.2784631000000002, 36.751643),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)"
276788,,,,"((N/A, N/A, N/A),)",,"((nan, nan),)","((N/A, N/A, N/A),)",,"((nan, nan),)",,...,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)"
276802,,,,,,,"((Awendo, Migori, Kenya),)","((Awendo, Migori, Kenya),)","((-0.9079812, 34.53172),)",,...,"((nan, nan),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)","((Roysambu, Nairobi, Kenya),)","((Roysambu, Nairobi, Kenya),)","((-1.2188396000000001, 36.88673920000001),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)"
276814,,,,"((Nairobi, Nairobi, Kenya), (London, Nakuru, K...","((Nairobi, Nairobi, Kenya), (London, Nakuru, K...","((-1.30316895, 36.826061224105075), (51.520665...","((Bondo, Siaya, Kenya),)","((Bondo, Siaya, Kenya),)","((-0.0993654, 34.2739935),)","((Kisumu, Kisumu, Kenya),)",...,"((nan, nan),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)","((Umoja, Nairobi, Kenya), (Tena, Nairobi, Kenya))","((Umoja, Nairobi, Kenya), (Tena, Nairobi, Kenya))","((-1.2818728000000001, 36.899339700000006), (-...","((Nakuru, Nairobi, Kenya), (Kisumu, Nairobi, K...","((Nakuru, Nairobi, Kenya), (Kisumu, Nairobi, K...","((-0.28027240000000003, 36.0712048), (-0.10291..."
276822,,,,,,,"((None, None, None),)","((None, None, nan),)","((nan, nan),)",,...,"((nan, nan),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)","((Buruburu, Nairobi, Kenya),)","((Buruburu, Nairobi, Kenya),)","((-1.2841949, 36.868837),)","((-1, Nairobi, Kenya),)","((nan, Nairobi, Kenya),)","((nan, nan),)"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297661,,,,,,,"((Khongoloti, Khongoloti, Zimbabwe), (Khongolo...","((Khongoloti, Khongoloti, Zimbabwe), (Khongolo...","((nan, nan), (nan, nan), (-26.1822222, 28.0533...","((Khongoloti, Khongoloti, Khongoloti), (Khongo...",...,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)","((-1, -1, -1),)",,"((nan, nan),)"
297663,,,,"((Sesambo, Lira City, Uganda), (Sesambo, Lirac...","((Sesambo, Lira City, Uganda), (Sesambo, Lirac...","((nan, nan), (nan, nan), (nan, nan))","((Khongoloti, Khongoloti, Zimbabwe), (Sesambo,...","((Khongoloti, Khongoloti, Zimbabwe), (Sesambo,...","((nan, nan), (nan, nan), (nan, nan))",,...,"((nan, nan),)","((Berea, Johannesburg, South Africa),)","((Berea, Johannesburg, South Africa),)","((-26.1822222, 28.0533333),)","((Uganda, Johannesburg, South Africa), (Berea,...","((Uganda, Johannesburg, South Africa), (Berea,...","((-26.041980499999998, 28.1591284), (-26.18222...","((-1, Johannesburg, South Africa),)","((nan, Johannesburg, South Africa),)","((nan, nan),)"
297666,,,,"((0, 0, 0),)","((0, 0, nan),)","((nan, nan),)","((Lububamshi, Katsanga, Democratic Republic Of...","((Lububamshi, Katsanga, Democratic Republic of...","((nan, nan),)","((Katsanga, Katsanga, Democratic Republic Of T...",...,"((nan, nan),)","((-1, Johannesburg, South Africa),)","((nan, Johannesburg, South Africa),)","((nan, nan),)","((Yeoville, Johannesburg, South Africa), (Bere...","((Yeoville, Johannesburg, South Africa), (Bere...","((-26.1838889, 28.0641667), (-26.1822222, 28.0...","((Yeoville, Johannesburg, South Africa), (Bloe...","((Yeoville, Johannesburg, South Africa), (Bloe...","((-26.1838889, 28.0641667), (-29.116395, 26.21..."
297667,,,,,,,,,,"((Lusaka, Lusaka, Zambia),)",...,"((nan, nan),)","((Sandton, Johannesburg, South Africa),)","((Sandton, Johannesburg, South Africa),)","((-26.068240600000003, 28.047984399999997),)","((Sandton, Johannesburg, South Africa), (Berea...","((Sandton, Johannesburg, South Africa), (Berea...","((-26.068240600000003, 28.047984399999997), (-...","((Sandton, Johannesburg, South Africa),)","((Sandton, Johannesburg, South Africa),)","((-26.068240600000003, 28.047984399999997),)"
