In [30]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

#import wiki page
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

#import to soup and check the elements
soup = BeautifulSoup(raw_wikipedia_page,'xml')
#list(soup.children)


In [31]:
#craw the table raw
table = soup.find('table')
Postcode      = []
Borough       = []
Neighbourhood = []

In [43]:
# clean up and discard the unnecessary rows
for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_content      = -1
    Borough_content       = -1
    Neighbourhood_content = -1
    
    #scan and get data
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_content = td_cell.text
        if counter == 2: 
            Borough_content = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighbourhood_content = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')
        counter +=1    
        
        #filter the Not assigned rows and check the neighbours
        if (Postcode_content == 'Not assigned' or Borough_content == 'Not assigned' or Neighbourhood_content == 'Not assigned'):      
            continue
             
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):    
            continue
                 
    except:
        pass
    
    if(Postcode_content == -1 or Borough_content == -1 or Neighbourhood_content == -1):
        
        continue   
    Postcode.append(Postcode_content)
    Borough.append(Borough_content)
    Neighbourhood.append(Neighbourhood_content)

In [44]:
# make postcode as 'primary key'
unique_postcode = set(Postcode)
print('Unique Postal codes:', len(unique_postcode))
Postcode_unique      = []
Borough_unique       = []
Neighbourhood_unique = []

# integrate neighbours
for postcode_unique_element in unique_postcode:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_unique.append(p_var)
    Borough_unique.append(b_var)
    Neighbourhood_unique.append(n_var)

Unique Postal codes: 77


In [45]:
#transform the data into a pandas dataframe
canada_dict = {'Postcode':Postcode_unique, 'Borough':Borough_unique, 'Neighbourhood':Neighbourhood_unique}
df_canada = pd.DataFrame.from_dict(canada_dict)
df_canada.to_csv('canada.csv')
df_canada.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4N,Central Toronto,"Lawrence Park, Lawrence Park"
1,M4K,East Toronto,"Riverdale, Riverdale"
2,M4W,Downtown Toronto,"Rosedale, Rosedale"
3,M5L,Downtown Toronto,"Commerce Court, Commerce Court"
4,M5S,Downtown Toronto,"University of Toronto, University of Toronto"
5,M3H,North York,"Bathurst Manor, Wilson Heights, Bathurst Manor..."
6,M6K,West Toronto,"Exhibition Place, Parkdale Village, Exhibition..."
7,M6P,West Toronto,"High Park, High Park"
8,M6R,West Toronto,"Parkdale, Roncesvalles, Parkdale, Roncesvalles"
9,M8Y,Etobicoke,"Humber Bay, Mimico NE, Old Mill South, The Que..."


In [14]:
#print the number of rows 
df_canada.shape

(77, 3)

In [46]:
#import csv to create dataframe
df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

df_join = pd.merge(df_canada, df_latlng, on=['Postcode'], how='inner')

In [39]:
df_join.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4K,East Toronto,Riverdale,43.679557,-79.352188
2,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
3,M5L,Downtown Toronto,Commerce Court,43.648198,-79.379817
4,M5S,Downtown Toronto,University of Toronto,43.662696,-79.400049
5,M3H,North York,"Bathurst Manor, Wilson Heights",43.754328,-79.442259
6,M6K,West Toronto,"Exhibition Place, Parkdale Village",43.636847,-79.428191
7,M6P,West Toronto,High Park,43.661608,-79.464763
8,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325
9,M8Y,Etobicoke,"Humber Bay, Mimico NE, Old Mill South, The Que...",43.636258,-79.498509


In [47]:
neighborhoods = df_join[['Borough', 'Neighbourhood', 'Latitude', 'Longitude']].copy()
neighborhoods.head(12)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Central Toronto,"Lawrence Park, Lawrence Park",43.72802,-79.38879
1,East Toronto,"Riverdale, Riverdale",43.679557,-79.352188
2,Downtown Toronto,"Rosedale, Rosedale",43.679563,-79.377529
3,Downtown Toronto,"Commerce Court, Commerce Court",43.648198,-79.379817
4,Downtown Toronto,"University of Toronto, University of Toronto",43.662696,-79.400049
5,North York,"Bathurst Manor, Wilson Heights, Bathurst Manor...",43.754328,-79.442259
6,West Toronto,"Exhibition Place, Parkdale Village, Exhibition...",43.636847,-79.428191
7,West Toronto,"High Park, High Park",43.661608,-79.464763
8,West Toronto,"Parkdale, Roncesvalles, Parkdale, Roncesvalles",43.64896,-79.456325
9,Etobicoke,"Humber Bay, Mimico NE, Old Mill South, The Que...",43.636258,-79.498509


In [56]:
# work with only boroughs that contain the word Toronto 
Borough = ['Downtown Toronto','West Toronto', 'East Toronto', 'Central Toronto']
neighbourhoods_Toronto = neighborhoods[neighborhoods.Borough.isin(Borough)]
neighbourhoods_Toronto.head(9)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Central Toronto,"Lawrence Park, Lawrence Park",43.72802,-79.38879
1,East Toronto,"Riverdale, Riverdale",43.679557,-79.352188
2,Downtown Toronto,"Rosedale, Rosedale",43.679563,-79.377529
3,Downtown Toronto,"Commerce Court, Commerce Court",43.648198,-79.379817
4,Downtown Toronto,"University of Toronto, University of Toronto",43.662696,-79.400049
6,West Toronto,"Exhibition Place, Parkdale Village, Exhibition...",43.636847,-79.428191
7,West Toronto,"High Park, High Park",43.661608,-79.464763
8,West Toronto,"Parkdale, Roncesvalles, Parkdale, Roncesvalles",43.64896,-79.456325
14,Central Toronto,"Deer Park, Rathnelly, South Hill, Deer Park, R...",43.686412,-79.400049


In [57]:
# explore Toronto
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbourhoods_Toronto['Borough'].unique()),
        neighbourhoods_Toronto.shape[0]
    )
)

The dataframe has 4 boroughs and 27 neighborhoods.


In [66]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhoods_Toronto in zip(neighbourhoods_Toronto['Latitude'], neighbourhoods_Toronto['Longitude'], neighbourhoods_Toronto['Borough'], neighbourhoods_Toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhoods_Toronto, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.653963, -79.387207.
