## 07. Geocoding - Reverse Geocoding

In [35]:
import folium
import geopandas as gpd
import geopy
import matplotlib.pyplot as plt 
import os
import pandas as pd 
import pathlib
import tqdm

from folium.plugins import FastMarkerCluster
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from tqdm import tqdm

In [2]:
os.chdir('..')
path = pathlib.Path().resolve()

#### 1. Data transformation to get `lat` and `lon` columns 

In [3]:
# single adress example 

locator = Nominatim(user_agent='myGeocoder')
location = locator.geocode("Sukiennice, Kraków, Poland")

In [4]:
print(location.address)

Sukiennice, 3, Rynek Główny, Stare Miasto, Kraków, województwo małopolskie, 31-042, Polska


In [5]:
print("Latitude = {}, Longitude = {}".format(location.latitude, location.longitude))

Latitude = 50.061692199999996, Longitude = 19.937348815057142


In [6]:
# whole dataframe 

In [7]:
df = pd.read_csv(f'{path}/data/addresses.csv')

In [8]:
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44


In [9]:
address_cols = ['Address1', 'Address3', 'Address4', 'Address5']
df[address_cols] = df[address_cols].astype(str)

In [10]:
df['address'] = df[address_cols].apply(lambda x: ','.join(x.dropna().astype(str)),axis=1) 

In [11]:
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,address
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län"
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län"
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län"
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län"
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms län"


In [12]:
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
df['location'] = df['address'].apply(geocode)
df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Liljeholmstorget 3, plan 1,117 63,STOCKHOLM,Stockholms län',), **{}).
Traceback (most recent call last):
  File "/opt/anaconda3/envs/spatial-ds-ws/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/anaconda3/envs/spatial-ds-ws/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/anaconda3/envs/spatial-ds-ws/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/opt/anaconda3/envs/spatial-ds-ws/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/opt/anaconda3/envs/spatial-ds-ws/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/opt/anaconda3/envs/spatial-ds-ws/lib

In [17]:
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,address,location,point,lat,lon,alt
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län","(Karlaplan, Östermalm, Östermalms stadsdelsomr...","(59.3373056, 18.0912159, 0.0)",59.337306,18.091216,0.0
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län","(47, Nybrogatan, Villastaden, Östermalm, Öster...","(59.3372072, 18.0790982, 0.0)",59.337207,18.079098,0.0
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län","(Karlavägen, Östermalm, Östermalms stadsdelsom...","(59.3361757, 18.0973562, 0.0)",59.336176,18.097356,0.0
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län","(Hötorgshallen, Hötorget, Klara, Norrmalm, Nor...","(59.3343273, 18.0626039, 0.0)",59.334327,18.062604,0.0
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms län","(Drottninggatan, Klara, Norrmalm, Norrmalms st...","(59.3357611, 18.0595426, 0.0)",59.335761,18.059543,0.0


In [18]:
# splitting point into three columns: 

In [19]:
df[['lat', 'lon', 'alt']] = pd.DataFrame(df.point.tolist(), index = df.index)
df.head()

Unnamed: 0,Typ,Nr,Namn,Address1,Address3,Address4,Address5,Telefon,address,location,point,lat,lon,alt
0,Butik,102,Fältöversten,Karlaplan 13,115 20,STOCKHOLM,Stockholms län,08/662 22 89,"Karlaplan 13,115 20,STOCKHOLM,Stockholms län","(Karlaplan, Östermalm, Östermalms stadsdelsomr...","(59.3373056, 18.0912159, 0.0)",59.337306,18.091216,0.0
1,Butik,104,,Nybrogatan 47,114 39,STOCKHOLM,Stockholms län,08/662 50 16,"Nybrogatan 47,114 39,STOCKHOLM,Stockholms län","(47, Nybrogatan, Villastaden, Östermalm, Öster...","(59.3372072, 18.0790982, 0.0)",59.337207,18.079098,0.0
2,Butik,106,Garnisonen,Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län,08/662 64 85,"Karlavägen 100 A,115 26,STOCKHOLM,Stockholms län","(Karlavägen, Östermalm, Östermalms stadsdelsom...","(59.3361757, 18.0973562, 0.0)",59.336176,18.097356,0.0
3,Butik,110,,Hötorgshallen,111 57,STOCKHOLM,Stockholms län,08/56849241,"Hötorgshallen,111 57,STOCKHOLM,Stockholms län","(Hötorgshallen, Hötorget, Klara, Norrmalm, Nor...","(59.3343273, 18.0626039, 0.0)",59.334327,18.062604,0.0
4,Butik,113,Sergel,Drottninggatan 45,111 21,STOCKHOLM,Stockholms län,08/21 47 44,"Drottninggatan 45,111 21,STOCKHOLM,Stockholms län","(Drottninggatan, Klara, Norrmalm, Norrmalms st...","(59.3357611, 18.0595426, 0.0)",59.335761,18.059543,0.0


In [20]:
# remove data without geodata 

df = df[pd.notnull(df['lat'])]

Such a dataframe can now be plotted as in other notebooks

#### 2. Reverse geocoding

In [21]:
# single location 

coordinates = "50.09554, 20.02214"
location = locator.reverse(coordinates)

In [22]:
location.raw

{'place_id': 189553249,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 358705271,
 'lat': '50.09601335',
 'lon': '20.02210391346795',
 'display_name': '38, Księdza kardynała Adama Stefana Sapiehy, Mistrzejowice, Kraków, województwo małopolskie, 31-640, Polska',
 'address': {'house_number': '38',
  'road': 'Księdza kardynała Adama Stefana Sapiehy',
  'quarter': 'Mistrzejowice',
  'suburb': 'Mistrzejowice',
  'city_district': 'Mistrzejowice',
  'city': 'Kraków',
  'state': 'województwo małopolskie',
  'ISO3166-2-lvl4': 'PL-12',
  'postcode': '31-640',
  'country': 'Polska',
  'country_code': 'pl'},
 'boundingbox': ['50.0958657', '50.0961511', '20.0219666', '20.0223506']}

In [23]:
print(location.address)

38, Księdza kardynała Adama Stefana Sapiehy, Mistrzejowice, Kraków, województwo małopolskie, 31-640, Polska


In [25]:
!pip install requests



In [26]:
import requests

In [28]:
requests.get(url, verify=False)



<Response [503]>

In [31]:
# whole dataframe 

filepath = f'{path}/data/Street_Poles.csv'
cols = ['X', 'Y', 'POLE_NUM', 'TYPE', 'HEIGHT', 'POLE_DATE', 'OWNER']
df = pd.read_csv(filepath, usecols = cols)
df.head()

Unnamed: 0,X,Y,POLE_NUM,TYPE,HEIGHT,POLE_DATE,OWNER
0,-75.066411,40.044774,1,WP,,,Streets
1,-75.061178,40.041861,3,TCB,,,Streets
2,-75.144451,40.008283,4,WP,,,Streets
3,-75.144289,40.00823,5,TCB,,,Streets
4,-75.260807,39.876608,100001,SLA,45.0,1997/04/30 00:00:00+00,Streets


In [32]:
# creating new column with both coordinates 

df['geom'] = df['Y'].map(str) + ',' + df['X'].map(str) 


In [33]:
locator = Nominatim(user_agent='myGeocoder', timeout=10)
rgeocode = RateLimiter(locator.reverse, min_delay_seconds=0.001)
rgeocode


<geopy.extra.rate_limiter.RateLimiter at 0x7f9778157cd0>

In [36]:
tqdm.pandas()

In [38]:
df = df.sample(100)

In [39]:
df['address'] = df['geom'].progress_apply(rgeocode)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:51<00:00,  1.96it/s]


In [40]:
df.head()

Unnamed: 0,X,Y,POLE_NUM,TYPE,HEIGHT,POLE_DATE,OWNER,geom,address
7443,-75.229767,39.949355,108470,WP,25.0,1997/05/08 00:00:00+00,Streets,"39.9493554418371,-75.2297673782449","(Wharton Wesley United Methodist Church, Catha..."
60370,-75.154573,39.962609,305224,WP,25.0,1997/07/15 00:00:00+00,Streets,"39.9626088334116,-75.1545728871205","(1008, Green Street, Poplar, Philadelphia, Phi..."
17808,-75.250873,39.977707,119878,WP,25.0,1997/06/30 00:00:00+00,Streets,"39.9777066982067,-75.2508731614201","(6522, Lebanon Avenue, Philadelphia, Philadelp..."
82846,-75.138805,39.978366,328907,SLF,30.0,,Streets,"39.9783660345629,-75.1388053949776","(American St & Berks St, American Street, West..."
89912,-75.187872,40.030772,407231,MAP,,1997/10/13 00:00:00+00,Streets,"40.0307719216354,-75.1878718609558","(6001, Wissahickon Avenue, Philadelphia, Phila..."
