### Import numpy, pandas and requests libraries

In [4]:
import numpy as np
import pandas as pd
import requests

### Scrape table from website and convert it to a pandas DataFrame

In [5]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = requests.get(url).content
df_list = pd.read_html(html)
df = df_list[0]

### Remove all rows with 'Not assigned' Borough values

In [6]:
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace=True)

### Reset index

In [7]:
df.reset_index(inplace=True)
df.drop(['index'], axis=1, inplace=True)

### Show dimensions of the dataframe

In [8]:
df.shape

(103, 3)

### Import library for latitude and longtitude assignments

In [9]:
#!conda install -c conda-forge geopy --yes # Uncomment this line to install geopy
from geopy.geocoders import Nominatim

### Assign latitude and longtitude to each address and drop rows where no address could be retrieved

In [10]:
geolocator = Nominatim(user_agent="me")
lonlat = []

for index, row in df.iterrows():
    location = geolocator.geocode('{}, Toronto, Ontario'.format(row['Postal Code']))
    try:
        latitude = location.latitude
        longitude = location.longitude
        lonlat.append([latitude, longitude])
    except:
        lonlat.append(['None', 'None'])
        continue
df2 = pd.concat([df, pd.DataFrame(lonlat, columns=['Latitude', 'Longtitude'])], axis=1)
df2 = df2[df2.Latitude != 'None']
df2

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longtitude
0,M3A,North York,Parkwoods,43.6535,-79.3839
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6535,-79.3839
6,M1B,Scarborough,"Malvern, Rouge",43.6535,-79.3839
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.6407,-79.5419
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.6535,-79.3839
13,M3C,North York,Don Mills,43.7328,-79.347
17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.6441,-79.5889
20,M5E,Downtown Toronto,Berczy Park,43.6421,-79.3774
22,M1G,Scarborough,Woburn,43.7657,-79.2219
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.6499,-79.3827


### Because not all latitude and longitude coordinates could be retrieved in the previous step, download the provided csv file with all coordinates and save it in a new dataframe

In [11]:
postcode = pd.read_csv("https://cocl.us/Geospatial_data")

### Add two new columns with the latitude and longitude values to the original dataframe

In [12]:
df['Latitude'] = ''
df['Longitude'] = ''

for index, row in df.iterrows():
    row['Latitude'] = format(float(postcode[postcode['Postal Code'] == row['Postal Code']]['Latitude'].values), '.6f')
    row['Longitude'] = format(float(postcode[postcode['Postal Code'] == row['Postal Code']]['Longitude'].values), '.6f')

In [13]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### Retrieve only rows with 'Toronto' in their Borough name

In [14]:
toronto = df[df['Borough'].str.contains('Toronto', regex=False)]

### Import plotting libraries

In [16]:
!conda install -c conda-forge folium=0.5.0 --yes # Uncomment this line to install folium
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.4.1               |             py_0          26 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    openssl-1.1.1g             |       h516909a_1         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

### Visualise neighbourhoods containing 'Toronto' in their Borough name

In [17]:
toronto_map = folium.Map(location=[43.651070,-79.347015], zoom_start=12)

for neighbourhood, borough, latitude, longitude in zip(toronto['Neighbourhood'], toronto['Borough'], toronto['Latitude'], toronto['Longitude']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [float(latitude), float(longitude)],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(toronto_map)
toronto_map

### Import library for KMeans clustering

In [18]:
from sklearn.cluster import KMeans

### Perform KMeans clustering

In [19]:
clusters = toronto.drop(['Postal Code','Borough','Neighbourhood'], 1)
kmeans = KMeans(n_clusters=5, random_state=0).fit(clusters)
toronto.insert(0, 'Cluster Labels', kmeans.labels_)

### Visualise colour-labelled neighbours

In [20]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
clusters_map = folium.Map(location=[43.651070,-79.347015], zoom_start=12)

# set color scheme for the clusters
x = np.arange(5)
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for latitude, longitude, neighbourhood, cluster in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood'], toronto['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [float(latitude), float(longitude)],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(clusters_map)     
clusters_map