In [1]:
import numpy as np 
import pandas as pd 
import json 
from pandas.io.json import json_normalize 
from geopy.geocoders import Nominatim 
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium  

print('Libraries imported.')

Libraries imported.


Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe.

In [2]:
d= pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=926287641', header = 0)

In [3]:
# The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood
d[0].columns = ['Postal Code', 'Borough', 'Neighborhood']
d[0]

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West
286,M8Z,Etobicoke,South of Bloor


In [4]:
df = d[0]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 288 entries, 0 to 287
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Postal Code   288 non-null    object
 1   Borough       288 non-null    object
 2   Neighborhood  288 non-null    object
dtypes: object(3)
memory usage: 6.9+ KB


In [5]:
# Only process the cells that have an assigned borough. 
# Ignore cells with a borough that is Not assigned.
drop_index = df.index[df['Borough'] == 'Not assigned']
df = df.drop(drop_index)
df

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
...,...,...,...
282,M8Z,Etobicoke,Kingsway Park South West
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West


In [6]:
# combined M5A rows into one row with the neighborhoods separated with a comma.
df1 = df.groupby("Postal Code").agg(lambda x:','.join(set(x)))
df1

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
M1E,Scarborough,"Guildwood,West Hill,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Richview Gardens,St. Phillips,Kingsview Villag..."
M9V,Etobicoke,"Jamestown,Albion Gardens,Silverstone,Thistleto..."


In [7]:
df1.to_csv('file_name.csv')

In [8]:
# If a cell has a borough but a Not assigned neighborhood, 
# the neighborhood will be the same as the borough.
df1.loc[df1['Neighborhood'] == "Not assigned",'Neighborhood'] = df1.loc[df1['Neighborhood'] == "Not assigned",'Borough']
df1

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
M1E,Scarborough,"Guildwood,West Hill,Morningside"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Richview Gardens,St. Phillips,Kingsview Villag..."
M9V,Etobicoke,"Jamestown,Albion Gardens,Silverstone,Thistleto..."


In [9]:
# use the .shape method to print the number of rows of the dataframe.
df1.shape

(103, 2)

In [10]:
df_postal = pd.read_csv('Geospatial_Coordinates.csv')
df_postal

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [11]:
pd.merge(df1, df_postal,  
          how="inner", on = "Postal Code")

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,West Hill,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Richview Gardens,St. Phillips,Kingsview Villag...",43.688905,-79.554724
101,M9V,Etobicoke,"Jamestown,Albion Gardens,Silverstone,Thistleto...",43.739416,-79.588437
