Prepare data

In [None]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

#import wiki page
wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

#import to soup and check the elements
soup = BeautifulSoup(raw_wikipedia_page,'xml')
#list(soup.children)


In [None]:
#craw the table raw
table = soup.find('table')
Postcode      = []
Borough       = []
Neighbourhood = []

In [None]:
# clean up and discard the unnecessary rows
for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_content      = -1
    Borough_content       = -1
    Neighbourhood_content = -1
    
    #scan and get data
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_content = td_cell.text
        if counter == 2: 
            Borough_content = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighbourhood_content = str(td_cell.text).strip()
            tag_a_Neighbourhood = td_cell.find('a')
        counter +=1    
        
        #filter the Not assigned rows and check the neighbours
        if (Postcode_content == 'Not assigned' or Borough_content == 'Not assigned' or Neighbourhood_content == 'Not assigned'):      
            continue
             
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighbourhood is None)):    
            continue
                 
    except:
        pass
    
    if(Postcode_content == -1 or Borough_content == -1 or Neighbourhood_content == -1):
        
        continue   
    Postcode.append(Postcode_content)
    Borough.append(Borough_content)
    Neighbourhood.append(Neighbourhood_content)

In [None]:
# make postcode as 'primary key'
unique_postcode = set(Postcode)
print('Unique Postal codes:', len(unique_postcode))
Postcode_unique      = []
Borough_unique       = []
Neighbourhood_unique = []

# integrate neighbours
for postcode_unique_element in unique_postcode:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighbourhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighbourhood[postcode_idx]
    Postcode_unique.append(p_var)
    Borough_unique.append(b_var)
    Neighbourhood_unique.append(n_var)

In [None]:
#transform the data into a pandas dataframe
canada_dict = {'Postcode':Postcode_unique, 'Borough':Borough_unique, 'Neighbourhood':Neighbourhood_unique}
df_canada = pd.DataFrame.from_dict(canada_dict)
df_canada.to_csv('canada.csv')
df_canada.head(12)

In [None]:
#print the number of rows 
df_canada.shape

In [None]:
#import csv to create dataframe
df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

df_join = pd.merge(df_canada, df_latlng, on=['Postcode'], how='inner')

In [None]:
df_join.head(12)

In [None]:
neighborhoods = df_join[['Borough', 'Neighbourhood', 'Latitude', 'Longitude']].copy()
neighborhoods.head(12)

In [None]:
# work with only boroughs that contain the word Toronto 
Borough = ['Downtown Toronto','West Toronto', 'East Toronto', 'Central Toronto']
neighbourhoods_Toronto = neighborhoods[neighborhoods.Borough.isin(Borough)]
neighbourhoods_Toronto.head(9)

Explore Toronto

In [None]:
# explore Toronto
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbourhoods_Toronto['Borough'].unique()),
        neighbourhoods_Toronto.shape[0]
    )
)

Render Map

In [None]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhoods_Toronto in zip(neighbourhoods_Toronto['Latitude'], neighbourhoods_Toronto['Longitude'], neighbourhoods_Toronto['Borough'], neighbourhoods_Toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhoods_Toronto, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto