In [1]:
import pandas as pd
import numpy as np

In [2]:
# get the table from Wikipedia
toronto_neigh, = pd.read_html("http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header = 0, match = 'Neighborhood')
toronto_neigh.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [3]:
# change column name
toronto_neigh.rename(columns = {'Postal code': 'PostalCode'}, inplace = True)
toronto_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [4]:
# drop the rows without assigned borough
toronto_neigh.drop(toronto_neigh[toronto_neigh['Borough'] == 'Not assigned'].index, inplace = True)
toronto_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [5]:
# check if there are multiple rows with the same postal code
if toronto_neigh['PostalCode'].is_unique:
    print('No action needed.')
else:
    print('Need to group by the postal code.')

No action needed.


In [6]:
# raplace NaN in Neighbohood with Borough
toronto_neigh['Neighborhood'].replace(np.nan, toronto_neigh['Borough'],inplace = True)
toronto_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [7]:
# replace / with comma in Neighborhood
toronto_neigh['Neighborhood'] = toronto_neigh['Neighborhood'].str.replace('/', ',')
toronto_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [8]:
# size of the dataframe
print(toronto_neigh.shape)

(103, 3)


In [9]:
# import the geospatial coordinates
lat_long = pd.read_csv('http://cocl.us/Geospatial_data', header = 0)
lat_long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# sort the postal code in Toronto neighborhoods. 
toronto_neigh.sort_values(by = 'PostalCode', axis = 0, ascending = True, inplace = True)
toronto_neigh.reset_index(drop = True, inplace = True)

In [11]:
# check if they are in the same order as those in lat_long. If true, add the coordinates to toronto_neigh dataframe.
if all(toronto_neigh['PostalCode'] == lat_long['Postal Code']):
    print('The Orders of the postal codes are the same. Concatenate the coordinates dataframe.')
    toronto_neigh_geo = pd.concat([toronto_neigh, lat_long], axis = 1 )
else:
    print('Something wrong with the orders.')

toronto_neigh_geo.drop(columns= 'Postal Code', inplace = True)

The Orders of the postal codes are the same. Concatenate the coordinates dataframe.


In [12]:
toronto_neigh_geo.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [13]:
print(toronto_neigh_geo.shape)


(103, 5)


In [14]:
# work with only boroughs that contain the word Toronto
borough_toronto = toronto_neigh_geo[toronto_neigh_geo['Borough'].str.contains('Toronto', case = False)].reset_index(drop = True)
borough_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West , Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar , The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [15]:
!pip install folium
import folium
from geopy.geocoders import Nominatim



In [16]:
# see Toronto neighborhoods on a map
address = 'Toronto, ON'
geolocator = Nominatim(user_agent = "on_explorer")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude

# create map object
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# add markers to map
for lat, lng, label in zip(borough_toronto['Latitude'], borough_toronto['Longitude'], borough_toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto)  
    
map_toronto

In [17]:
import requests
