In [54]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim 

In [4]:
df = pd.read_pickle("List_of_postal_codes_of_Canada_Lat_Long.pkl")

In [7]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,43.64869,-79.38544
1,M2A,Not assigned,Not assigned,43.64869,-79.38544
2,M3A,North York,Parkwoods,43.75188,-79.33036
3,M4A,North York,Victoria Village,43.73042,-79.31282
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65514,-79.36265
...,...,...,...,...,...
175,M5Z,Not assigned,Not assigned,43.64869,-79.38544
176,M6Z,Not assigned,Not assigned,43.64869,-79.38544
177,M7Z,Not assigned,Not assigned,43.64869,-79.38544
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.62463,-79.52835


### Filtering data

#### Selecting only boroughs that contains the wird Toronto

In [8]:
df_toronto = df[df['Borough'].str.contains("Toronto")].reset_index(drop=True)

In [88]:
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65514,-79.36265
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66449,-79.39302
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.65736,-79.37818
3,M5C,Downtown Toronto,St. James Town,43.65143,-79.37557
4,M4E,East Toronto,The Beaches,43.67703,-79.29542


#### Working only with Latitude and Longitude

In [22]:
df_toronto_numerical = df_toronto[["Latitude","Longitude"]]

In [89]:
df_toronto_numerical.head()

Unnamed: 0,Latitude,Longitude
0,43.65514,-79.36265
1,43.66449,-79.39302
2,43.65736,-79.37818
3,43.65143,-79.37557
4,43.67703,-79.29542


### Clustering

In [50]:
kclusters = 4

In [51]:
k_means = KMeans(init="k-means++", n_clusters=kclusters, n_init=12)

In [24]:
k_means.fit(df_toronto_numerical)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=4, n_init=12, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [25]:
k_means_labels = k_means.labels_

In [26]:
df_toronto_numerical_clustered = df_toronto_numerical.copy()

In [28]:
df_toronto_numerical_clustered["cluster"] = k_means_labels

In [90]:
df_toronto_numerical_clustered.head()

Unnamed: 0,Latitude,Longitude,cluster
0,43.65514,-79.36265,0
1,43.66449,-79.39302,0
2,43.65736,-79.37818,0
3,43.65143,-79.37557,0
4,43.67703,-79.29542,3


### Generate map

In [None]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [None]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

In [47]:
address = 'Toronto, Ontario Canada'
geolocator = Nominatim(user_agent=address)
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [48]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

In [57]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [82]:
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [83]:
latitude = df_toronto_numerical_clustered["Latitude"]
longitude = df_toronto_numerical_clustered["Longitude"]
cluster = df_toronto_numerical_clustered["cluster"]

In [84]:
markers_colors = []
for lat, lon, cluster in zip(latitude, longitude, cluster):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon], 
        radius=5, 
        popup=label, 
        color=rainbow[cluster-1], 
        fill_color=rainbow[cluster-1], 
    ).add_to(map_clusters)

In [85]:
map_clusters