# Segmenting and Clustering Neighborhoods in Toronto

## Part 3

In [1]:
import numpy as np 
import pandas as pd 
import json

from geopy.geocoders import Nominatim 
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium 

print('Libraries imported.')

Libraries imported.


### Previously in part 2, we have obtained the dataframe df2

In [2]:
## Obtain the data.
data = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
df = data[0]
df1 = df[df.Borough != 'Not assigned']
df1.reset_index(drop=True, inplace=True)
## Obtain postal codes.
postals = pd.read_csv("http://cocl.us/Geospatial_data")
## Merge both dataframes.
df2 = pd.merge(df1, postals)

df2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Now we check the number of boroughs and neighborhoods.

In [3]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(df2['Borough'].unique()), df2.shape[0]))

The dataframe has 10 boroughs and 103 neighborhoods.


### Create and visualize the map of Toronto with its neighbourhoods

In [4]:
## Get the latitude and longitude values of Toronto.
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

## Create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

## add markers to map
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Clustering Neighborhoods.

In [5]:
X = df2.values[:, 3:]

# set number of clusters
kclusters = 4

# run k-means clustering
kmeans = KMeans(init = "k-means++", n_clusters=kclusters, random_state=0).fit(X)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
labels = kmeans.labels_
df2["Labels"] = labels
df2.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Labels
0,M3A,North York,Parkwoods,43.753259,-79.329656,2
1,M4A,North York,Victoria Village,43.725882,-79.315572,2
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0


### Visualize clusters neighbourhoods

In [6]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df2['Latitude'], df2['Longitude'], df2['Neighbourhood'], df2['Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.9).add_to(map_clusters)
       
map_clusters