## Assignment Part III - Segmenting and Clustering Neighborhoods in Toronto

### Load Libraries

In [71]:
import pandas as pd
import numpy as np

In [72]:
# Install and Load Folium
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')

Solving environment: done

# All requested packages already installed.

Folium installed and imported!


In [73]:
# Install and Load Geocoder
!pip install msgpack
!pip install geocoder
import geocoder
print('Geocoder installed and imported!')

Geocoder installed and imported!


In [74]:
# Import KMeans Clustering
from sklearn.cluster import KMeans

In [75]:
# Import Matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors

### Read Saved Output from Part II

In [76]:
# Read the saved dataframe from file
pc_table2 = pd.read_csv("Toronto2.csv")
pc_table2.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


### Explore and Cluster the Neighborhoods in Toronto.

In [77]:
# Let's work with boroughs that have 'Toronto' in their names
# We see that there are 38 postal codes (out of total 103)

pc_toronto = pc_table2[pc_table2["Borough"].str.contains('Toronto')].copy()
pc_toronto.shape

(38, 5)

In [78]:
# First, we need to get the geographical coordinates of Toronto area

# initialize your variable to None
lat_lng_coords = None
    
# loop until you get the coordinates
while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario')
    lat_lng_coords = g.latlng
    #print(g.latlng)

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

print('Geographical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

Geographical coordinates of Toronto are 43.653226, -79.3831843.


In [79]:
# Show an initial map of Toronto with neighborhoods as markers
# There are a total of 4 boroughs but this is not apparent from the map

# create map of Toronto
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(pc_toronto['Latitude'], pc_toronto['Longitude'], pc_toronto['Borough'], pc_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
# display the map of Toronto
toronto_map

In [84]:
# Let's see what happens if we cluster the neighborhoods based on latitude-longitude coordinates

# set number of clusters to 4 (since we know there are 4 boroughs in our dataframe)
kclusters = 4

# drop non-numerical fields for k-means clustering
toronto_grouped_clustering = pc_toronto.drop(['Postcode','Borough','Neighborhood'], axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
#kmeans.labels_

# append cluster labels to the dataframe
pc_toronto["Cluster_Labels"] = kmeans.labels_
pc_toronto.head(20)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster_Labels
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,0
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,1
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,3
30,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568,0
31,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259,3
36,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752,0


In [85]:
# Show a revised map of Toronto with the 4 detected clusters of neighborhoods
# From the table and the map, we notice that most of the clusters labels are consistent with the boroughs

# import matplotlib libraries
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbor, borough, cluster in zip(pc_toronto['Latitude'], pc_toronto['Longitude'], pc_toronto['Neighborhood'], pc_toronto['Borough'], pc_toronto['Cluster_Labels']):
    label = folium.Popup(str(borough) + ' -- ' + str(neighbor) + ' -- ' + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters