Import and prepare

In [13]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import lxml.html as lh
import urllib.request
import bs4 as bs

In [14]:
#import wiki page
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

Craw table and filter

In [15]:
def scrape_table_bs4(cname,cols):
    page  = urllib.request.urlopen(url).read()
    soup  = bs.BeautifulSoup(page,'lxml')
    table = soup.find("table",class_=cname)
    header = [head.findAll(text=True)[0].strip() for head in table.find_all("th")]
    data   = [[td.findAll(text=True)[0].strip() for td in tr.find_all("td")]
              for tr in table.find_all("tr")]
    data    = [row for row in data if len(row) == cols]
    raw = pd.DataFrame(data,columns=header)
    return raw
# get raw table
rawT = scrape_table_bs4("wikitable",3)

In [16]:
# filter not assigned Borough
Toronto=rawT[~rawT['Borough'].isin(['Not assigned'])]

# sort and set index
Toronto=Toronto.sort_values(by=['Postcode','Borough','Neighbourhood'], ascending=[1,1,1]).reset_index(drop=True)

# filter not assigned Neighbourhood
Toronto.loc[Toronto['Neighbourhood'] == 'Not assigned', ['Neighbourhood']] = Toronto['Borough']

# intergrate neighbourhood
Toronto = Toronto.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()

In [17]:
#print the number of rows 
Toronto.shape

(103, 3)

In [18]:
# import csv to create dataframe
# since we can choose to use csv or geocoder and I choose csv way
df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

df_join = pd.merge(Toronto, df_latlng, on=['Postcode'], how='inner')

Explore Toronto

In [19]:
# work with only boroughs that contain the word Toronto 
neighborhoods = df_join[['Borough', 'Neighbourhood', 'Latitude', 'Longitude']].copy()
Borough = ['Downtown Toronto','West Toronto', 'East Toronto', 'Central Toronto']
neighbourhoods_Toronto = neighborhoods[neighborhoods.Borough.isin(Borough)]
neighbourhoods_Toronto.head(9)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
37,East Toronto,The Beaches,43.676357,-79.293031
41,East Toronto,"Riverdale, The Danforth West",43.679557,-79.352188
42,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,East Toronto,Studio District,43.659526,-79.340923
44,Central Toronto,Lawrence Park,43.72802,-79.38879
45,Central Toronto,Davisville North,43.712751,-79.390197
46,Central Toronto,North Toronto West,43.715383,-79.405678
47,Central Toronto,Davisville,43.704324,-79.38879
48,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [20]:
# explore Toronto
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbourhoods_Toronto['Borough'].unique()),
        neighbourhoods_Toronto.shape[0]
    )
)

The dataframe has 4 boroughs and 38 neighborhoods.


Render Map

In [24]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
from geopy.geocoders import Nominatim
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhoods_Toronto in zip(neighbourhoods_Toronto['Latitude'], neighbourhoods_Toronto['Longitude'], neighbourhoods_Toronto['Borough'], neighbourhoods_Toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhoods_Toronto, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.653963, -79.387207.
