### Data from wikipedia

In [0]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium

In [30]:
url = 'https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto'
result = requests.get(url)
print(url)
print(result.status_code)
print(result.headers)

https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Toronto
200
{'Date': 'Wed, 09 Oct 2019 07:16:52 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Content-Length': '31398', 'Connection': 'keep-alive', 'Server': 'mw1271.eqiad.wmnet', 'Vary': 'Accept-Encoding,Cookie,Authorization', 'X-Powered-By': 'PHP/7.2.22-1+0~20190902.26+debian9~1.gbpd64eb7+wmf1', 'X-Content-Type-Options': 'nosniff', 'P3P': 'CP="See https://en.wikipedia.org/wiki/Special:CentralAutoLogin/P3P for more info."', 'Content-language': 'en', 'Last-Modified': 'Mon, 07 Oct 2019 21:02:08 GMT', 'Backend-Timing': 'D=107705 t=1570482443706777', 'Content-Encoding': 'gzip', 'X-Varnish': '583822252 534412826, 63768552 733551645', 'Age': '37861', 'X-Cache': 'cp1079 hit/3, cp1085 hit/8', 'X-Cache-Status': 'hit-front', 'Server-Timing': 'cache;desc="hit-front"', 'Strict-Transport-Security': 'max-age=106384710; includeSubDomains; preload', 'Set-Cookie': 'WMF-Last-Access=09-Oct-2019;Path=/;HttpOnly;secure;Expires=Sun, 10 Nov 2019 0

In [33]:
df = pd.DataFrame(columns=['Hood', 'Latitude', 'Longitude'])
df.head()

Unnamed: 0,Hood,Latitude,Longitude


In [0]:
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find('table')
lis = table.find_all('li')

list_of_n = []
for li in lis:
    a = li.find('a')
    list_of_n.append(a.get('title').split(", ")[0].split(" (neighbourhood)")[0].split(" (Toronto)")[0] )

In [35]:
df['Hood'] = pd.Series(list_of_n)
print(df.shape)
df.head()

(32, 3)


Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,,
1,The Annex,,
2,Baldwin Village,,
3,Cabbagetown,,
4,CityPlace,,


In [36]:
df.drop_duplicates(inplace=True)
print(df.shape)
df.head()

(32, 3)


Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,,
1,The Annex,,
2,Baldwin Village,,
3,Cabbagetown,,
4,CityPlace,,


In [37]:
to_drop_unknown = []
geolocator = Nominatim(user_agent="coursera")
for index, row in df.iterrows():
    address = row['Hood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        df.loc[index, 'Latitude'] = latitude
        df.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        to_drop_unknown.append(index)

df.head()

The geograpical coordinate of Alexandra Park, Toronto are 43.65075755, -79.4042978683821.
The geograpical coordinate of The Annex, Toronto are 43.6703377, -79.407117.
The geograpical coordinate of Baldwin Village, Toronto are 43.66981815, -79.4971720154081.
The geograpical coordinate of Cabbagetown, Toronto are 43.6644734, -79.3669861.
The geograpical coordinate of CityPlace, Toronto are 43.6392482, -79.3963865.
The geograpical coordinate of Chinatown, Toronto are 43.6529237, -79.3980316.
The geograpical coordinate of Church and Wellesley, Toronto are 43.6655242, -79.3838011.
The geograpical coordinate of Corktown, Toronto are 43.6573709, -79.3565189.
The geograpical coordinate of Discovery District, Toronto are 43.6575555, -79.3894803.
The geograpical coordinate of Distillery District, Toronto are 43.6502947, -79.3595401.
The geograpical coordinate of Toronto Entertainment District, Toronto are 43.64383755, -79.3866924741406.
Cannot do: East Bayfront, Toronto, will drop index: 11
The 

Unnamed: 0,Hood,Latitude,Longitude
0,Alexandra Park,43.6508,-79.4043
1,The Annex,43.6703,-79.4071
2,Baldwin Village,43.6698,-79.4972
3,Cabbagetown,43.6645,-79.367
4,CityPlace,43.6392,-79.3964


In [38]:
clean_df = df.drop(to_drop_unknown)
clean_df.shape

(30, 3)

In [39]:
address = 'Toronto'
try:
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
    df.loc[index, 'Latitude'] = latitude
    df.loc[index, 'Longitude'] = longitude
except AttributeError:
    print('Cannot do: {}, will drop index: {}'.format(address, index))

my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(clean_df['Latitude'], clean_df['Longitude'], clean_df['Hood']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(my_map)  
    
my_map

The geograpical coordinate of Toronto are 43.653963, -79.387207.
