Import all software to python code

In [3]:
import pandas as pd
import numpy as np

!conda install -c conda-forge folium=0.5.0
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                       

Dataframe from previous notebooks

In [4]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
droprow = df[ df['Borough'] == 'Not assigned' ].index
df = df.drop(droprow,inplace=False).reset_index().drop(columns='index')
geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_merge = pd.merge(left=df, right=geo, how='left', left_on='Postal Code', right_on='Postal Code')

Create Latitude/Longitude coordinate dataframe for K-Means Clustering

In [5]:
df_group = df_merge.groupby('Neighborhood').mean().reset_index()
#df_group.head(12)
df_group_cluster = df_group.drop('Neighborhood', 1)
df_group_cluster.head(12)

Unnamed: 0,Latitude,Longitude
0,43.7942,-79.262029
1,43.602414,-79.543484
2,43.754328,-79.442259
3,43.786947,-79.385975
4,43.733283,-79.41975
5,43.644771,-79.373306
6,43.692657,-79.264848
7,43.636847,-79.428191
8,43.662744,-79.321558
9,43.628947,-79.39442


Run K-means clustering and check cluster labels for each row in dataframe

In [6]:
from sklearn.cluster import KMeans
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_group_cluster)
kmeans.labels_[0:10] 

array([1, 2, 3, 3, 3, 0, 4, 0, 4, 0], dtype=int32)

Append K-means clustering labels to Neighborhood data frame

In [7]:
df_group.insert(0, 'Cluster Labels', kmeans.labels_)
df_group.head(12)

Unnamed: 0,Cluster Labels,Neighborhood,Latitude,Longitude
0,1,Agincourt,43.7942,-79.262029
1,2,"Alderwood, Long Branch",43.602414,-79.543484
2,3,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
3,3,Bayview Village,43.786947,-79.385975
4,3,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
5,0,Berczy Park,43.644771,-79.373306
6,4,"Birch Cliff, Cliffside West",43.692657,-79.264848
7,0,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191
8,4,Business reply mail Processing Centre,43.662744,-79.321558
9,0,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442


Merging original dataframe with neighborhood cluster labels

In [8]:
df_neighbor = df_group.set_index('Neighborhood')
df_merged = pd.merge(left=df, right=df_neighbor, how='left', left_on='Neighborhood', right_on='Neighborhood')
df_merged.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood,Cluster Labels,Latitude,Longitude
0,M3A,North York,Parkwoods,4,43.753259,-79.329656
1,M4A,North York,Victoria Village,4,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",0,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",3,43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",0,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,2,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",1,43.806686,-79.194353
7,M3B,North York,Don Mills,4,43.735903,-79.346555
8,M4B,East York,"Parkview Hill, Woodbine Gardens",4,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",0,43.657162,-79.378937


Find coordinates of Toronto and generate folium world map

In [21]:
latitude = 43.6532
longitude = -79.3832
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

Set color scheme for the clusters

In [22]:
import matplotlib.cm as cm
import matplotlib.colors as colors

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

Add markers to the map

In [23]:
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighborhood'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters