We import all the relevant librairies

In [1]:
import numpy as np # library to handle data in a vectorized manner
import bs4 
from urllib.request import urlopen
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


# First Part : Getting and cleaning the data

In [2]:
#Getting the data from url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url)
soup = bs4.BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]

In [3]:
table_contents=[]
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [4]:
df.shape

(103, 3)

# Part 2 - Add the Latitude and Longitude

In [5]:
import csv
geo_data = []
with open('Geospatial_Coordinates.csv', newline='') as csvfile:
    long_lat_data = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in long_lat_data:
        geo_data.append(row)
geo_data

[['Postal Code', 'Latitude', 'Longitude'],
 ['M1B', '43.8066863', '-79.1943534'],
 ['M1C', '43.7845351', '-79.1604971'],
 ['M1E', '43.7635726', '-79.1887115'],
 ['M1G', '43.7709921', '-79.2169174'],
 ['M1H', '43.773136', '-79.2394761'],
 ['M1J', '43.7447342', '-79.2394761'],
 ['M1K', '43.7279292', '-79.2620294'],
 ['M1L', '43.7111117', '-79.2845772'],
 ['M1M', '43.716316', '-79.2394761'],
 ['M1N', '43.692657', '-79.2648481'],
 ['M1P', '43.7574096', '-79.273304'],
 ['M1R', '43.7500715', '-79.2958491'],
 ['M1S', '43.7942003', '-79.2620294'],
 ['M1T', '43.7816375', '-79.3043021'],
 ['M1V', '43.8152522', '-79.2845772'],
 ['M1W', '43.7995252', '-79.3183887'],
 ['M1X', '43.8361247', '-79.2056361'],
 ['M2H', '43.8037622', '-79.3634517'],
 ['M2J', '43.7785175', '-79.3465557'],
 ['M2K', '43.7869473', '-79.385975'],
 ['M2L', '43.7574902', '-79.3747141'],
 ['M2M', '43.789053', '-79.4084928'],
 ['M2N', '43.7701199', '-79.4084928'],
 ['M2P', '43.7527583', '-79.4000493'],
 ['M2R', '43.7827364', '-79

In [10]:
df['Latitude'] = ""
df['Longitude'] = ""
for index, row in df.iterrows():
    for line in geo_data:
        if str(row['PostalCode']) == str(line[0]):
            row['Latitude'] = float(line[1])
            row['Longitude'] = float(line[2])
            
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6543,-79.3606
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7185,-79.4648
4,M7A,Queen's Park,Ontario Provincial Government,43.6623,-79.3895
5,M9A,Etobicoke,Islington Avenue,43.6679,-79.5322
6,M1B,Scarborough,"Malvern, Rouge",43.8067,-79.1944
7,M3B,North York,Don Mills North,43.7459,-79.3522
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7064,-79.3099
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3789


# Part 3 - Analyse the data

In [11]:
central_toronto_data = df[df['Borough'] == 'Central Toronto'].reset_index(drop=True)
central_toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.728,-79.3888
1,M5N,Central Toronto,Roselawn,43.7117,-79.4169
2,M4P,Central Toronto,Davisville North,43.7128,-79.3902
3,M5P,Central Toronto,Forest Hill North & West,43.6969,-79.4113
4,M4R,Central Toronto,North Toronto West,43.7154,-79.4057


In [12]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Central Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Central Toronto are 43.6534817, -79.3839347.


In [13]:
# create map of Central Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto