In [18]:
import folium
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

In [5]:
pd_geo = pd.read_csv('pd_geo.csv')
pd_geo.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


using groupby function, merge the same Neibourhood and use mean Latitude and Longitude as group's Latitude and Longitude

In [12]:
geo_group = pd_geo.groupby('Neighbourhood').mean().reset_index()
latitude = geo_group['Latitude'].values
longitude = geo_group['Longitude'].values
geo_group 

Unnamed: 0,Neighbourhood,Latitude,Longitude
0,Agincourt,43.794200,-79.262029
1,"Alderwood, Long Branch",43.602414,-79.543484
2,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
3,Bayview Village,43.786947,-79.385975
4,"Bedford Park, Lawrence Manor East",43.733283,-79.419750
...,...,...,...
94,"Willowdale, Willowdale West",43.782736,-79.442259
95,Woburn,43.770992,-79.216917
96,Woodbine Heights,43.695344,-79.318389
97,York Mills West,43.752758,-79.400049


Using K-Means function, set clusters is 5, to cluster the neighbourhoods

In [23]:
KClusters = 5
geo_train = geo_group.drop('Neighbourhood', axis=1)
kmeans = KMeans(n_clusters = KClusters, random_state = 41 ).fit(geo_train)
geo_group['labels'] = kmeans.labels_

In [24]:
geo_group

Unnamed: 0,Neighbourhood,Latitude,Longitude,labels
0,Agincourt,43.794200,-79.262029,3
1,"Alderwood, Long Branch",43.602414,-79.543484,1
2,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,4
3,Bayview Village,43.786947,-79.385975,4
4,"Bedford Park, Lawrence Manor East",43.733283,-79.419750,4
...,...,...,...,...
94,"Willowdale, Willowdale West",43.782736,-79.442259,4
95,Woburn,43.770992,-79.216917,3
96,Woodbine Heights,43.695344,-79.318389,0
97,York Mills West,43.752758,-79.400049,4


usin

In [25]:
map_clusters = folium.Map(location=[40.7127281, -74.0060152], zoom_start=11)

# set color scheme for the clusters
x = np.arange(KClusters)
ys = [i + x + (i*x)**2 for i in range(KClusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(geo_group['Latitude'], geo_group['Longitude'], geo_group['Neighbourhood'], geo_group['labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters