#### Import packages that will be used

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from sklearn.cluster import KMeans

#### read in csv from previous notebook

In [2]:
Toronto = pd.read_csv("Toronto.csv")
#Toronto

#### See how many neighborhoods and boroughs there are

In [3]:
print("The dataframe has {} Boroughs and {} Neighborhoods.".format(
        len(Toronto["Borough"].unique()),
        Toronto.shape[0]
    )
)

The dataframe has 10 Boroughs and 102 Neighborhoods.


#### Get coordinates for Toronto for our basemap

In [4]:
address = "Toronto, ON"

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto ON is {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinates of Toronto ON is 43.653963, -79.387207.


#### Create a map of Toronto with Neighborhoods signified as points <- Note I used Stamen Toner as basemap tile to show contrast

In [5]:
#toronto_location = [43.653963, -79.387207]
toronto_location = [latitude, longitude]

toronto_map = folium.Map(location=toronto_location, tiles="Stamen Toner", zoom_start=10)

for lat, lng, borough, neighborhood, ps_code in zip(Toronto["Latitude"], Toronto["Longitude"], Toronto["Borough"],Toronto["Neighborhood"], Toronto["Postal Code"]):
    label = '{}, {}'.format(ps_code, neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)

toronto_map

#### Examine the head of the dataset

In [6]:
Toronto.head(5)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### See Neighborhood and Postcode distribution

In [7]:
Toronto.groupby("Borough").count()

Unnamed: 0_level_0,Postal Code,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,18,18,18,18
East Toronto,5,5,5,5
East York,5,5,5,5
Etobicoke,12,12,12,12
North York,24,24,24,24
Queen's Park,1,1,1,1
Scarborough,17,17,17,17
West Toronto,6,6,6,6
York,5,5,5,5


In [8]:
Toronto_grouped = Toronto.groupby("Neighborhood").mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Latitude,Longitude
0,"Adelaide, King, Richmond",43.650571,-79.384568
1,Agincourt,43.7942,-79.262029
2,"Agincourt North, L'Amoreaux East, Milliken, St...",43.815252,-79.284577
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
4,"Alderwood, Long Branch",43.602414,-79.543484
5,"Bathurst Manor, Downsview North, Wilson Heights",43.754328,-79.442259
6,Bayview Village,43.786947,-79.385975
7,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
8,Berczy Park,43.644771,-79.373306
9,"Birch Cliff, Cliffside West",43.692657,-79.264848


#### Lets try clustering the neighborhoods <- there are ten boroughs so lets try ten clusters

In [9]:
# set number of clusters
kclusters = 10

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 9, 9, 0, 4, 6, 7, 6, 3, 1])

#### Examine the head and tail of the cluster dataframe

In [10]:
Toronto_merged = Toronto

# add clustering labels
Toronto_merged['Cluster Labels'] = kmeans.labels_

Toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,3
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,9
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,9
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,4


In [11]:
Toronto_merged.tail()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels
97,M9N,York,Weston,43.706876,-79.518188,7
98,M9P,Etobicoke,Westmount,43.696319,-79.532242,2
99,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724,1
100,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437,1
101,M9W,Etobicoke,Northwest,43.706748,-79.594054,7


#### Lets map the clusters

In [12]:
map_clusters = folium.Map(location=toronto_location, zoom_start=10)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, Boro, PostC, cluster in zip(Toronto_merged["Latitude"], Toronto_merged["Longitude"], Toronto_merged["Borough"], 
                                  Toronto_merged["Postal Code"], Toronto_merged["Neighborhood"], Toronto_merged["Cluster Labels"]):
    label = folium.Popup(str(poi) + " Cluster " + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Now lets examine cluster membership

#### Cluster 1

In [13]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
3,Scarborough,Woburn,0
55,Downtown Toronto,St. James Town,0
58,Downtown Toronto,"Adelaide, King, Richmond",0
70,Downtown Toronto,"First Canadian Place, Underground city",0


#### Cluster 2

In [14]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
9,Scarborough,"Birch Cliff, Cliffside West",1
21,North York,"Newtonbrook, Willowdale",1
38,East York,Leaside,1
39,East York,Thorncliffe Park,1
43,East Toronto,Studio District,1
83,West Toronto,"Parkdale, Roncesvalles",1
85,Queen's Park,Not assigned,1
86,East Toronto,Eastern,1
87,Etobicoke,"Humber Bay Shores, Mimico South, New Toronto",1
91,Etobicoke,"Kingsway Park South West, Mimico NW, The Queen...",1


#### Note the Queen's Park Borough in the cluster above (This is an actual park so probably not a good Borough to examine further)

#### Cluster 3

In [15]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
46,Central Toronto,North Toronto West,2
51,Downtown Toronto,"Cabbagetown, St. James Town",2
77,West Toronto,"Little Portugal, Trinity",2
92,Etobicoke,Islington Avenue,2
98,Etobicoke,Westmount,2


#### Cluster 4

In [16]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
0,Scarborough,"Rouge, Malvern",3
8,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",3
13,Scarborough,"Clarks Corners, Sullivan, Tam O'Shanter",3
14,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",3
17,North York,Hillcrest Village,3
18,North York,"Fairview, Henry Farm, Oriole",3
20,North York,"Silver Hills, York Mills",3
25,North York,Parkwoods,3
30,North York,"CFB Toronto, Downsview East",3
42,East Toronto,"The Beaches West, India Bazaar",3


#### Cluster 5

In [17]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
4,Scarborough,Cedarbrae,4
10,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",4
24,North York,Willowdale West,4
53,Downtown Toronto,"Harbourfront, Regent Park",4
54,Downtown Toronto,"Ryerson, Garden District",4
57,Downtown Toronto,Central Bay Street,4
59,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",4
78,West Toronto,"Brockton, Exhibition Place, Parkdale Village",4
89,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",4
90,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",4


#### Cluster 6

In [18]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 5,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
12,Scarborough,Agincourt,5
29,North York,"Northwood Park, York University",5
34,North York,Victoria Village,5
35,East York,"Woodbine Gardens, Parkview Hill",5
36,East York,Woodbine Heights,5
40,East York,East Toronto,5
61,Downtown Toronto,"Commerce Court, Victoria Hotel",5
65,Central Toronto,"The Annex, North Midtown, Yorkville",5
71,North York,"Lawrence Heights, Lawrence Manor",5
94,Etobicoke,"Bloordale Gardens, Eringate, Markland Wood, Ol...",5


#### Cluster 7

In [19]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 6,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
5,Scarborough,Scarborough Village,6
7,Scarborough,"Clairlea, Golden Mile, Oakridge",6
26,North York,Don Mills North,6
27,North York,"Flemingdon Park, Don Mills South",6
28,North York,"Bathurst Manor, Downsview North, Wilson Heights",6
44,Central Toronto,Lawrence Park,6
45,Central Toronto,Davisville North,6
56,Downtown Toronto,Berczy Park,6
62,North York,"Bedford Park, Lawrence Manor East",6
63,Central Toronto,Roselawn,6


#### Cluster 8

In [20]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 7,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
6,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",7
31,North York,Downsview West,7
41,East Toronto,"The Danforth West, Riverdale",7
52,Downtown Toronto,Church and Wellesley,7
68,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",7
81,York,"The Junction North, Runnymede",7
96,North York,"Emery, Humberlea",7
97,York,Weston,7
101,Etobicoke,Northwest,7


#### Cluster 9

In [21]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 8,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
11,Scarborough,"Maryvale, Wexford",8
15,Scarborough,"L'Amoreaux West, Steeles West",8
19,North York,Bayview Village,8
33,North York,Downsview Northwest,8
50,Downtown Toronto,Rosedale,8
64,Central Toronto,"Forest Hill North, Forest Hill West",8
73,York,Humewood-Cedarvale,8


#### Cluster 10

In [22]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 9,
                     Toronto_merged.columns[[1,2] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Neighborhood,Cluster Labels
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",9
2,Scarborough,"Guildwood, Morningside, West Hill",9
16,Scarborough,Upper Rouge,9
22,North York,Willowdale South,9
23,North York,York Mills West,9
32,North York,Downsview Central,9
37,East Toronto,The Beaches,9
60,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",9
66,Downtown Toronto,"Harbord, University of Toronto",9
74,York,Caledonia-Fairbanks,9


#### It looks like the clusters are fairly evenly distributed geographically
So I will use the two most geographically distant Boroughs ("Etobicoke" and "Scarborough") in further weeks to act as a random sample of Toronto Boroughs (and subsequently Neighborhoods).

#### Lets visualize the clusters one more time to remind you (the viewer) of their distribution

In [23]:
map_clusters = folium.Map(location=toronto_location, tiles="Stamen Toner", zoom_start=10)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, Boro, PostC, cluster in zip(Toronto_merged["Latitude"], Toronto_merged["Longitude"], Toronto_merged["Borough"], 
                                  Toronto_merged["Postal Code"], Toronto_merged["Neighborhood"], Toronto_merged["Cluster Labels"]):
    label = folium.Popup(str(poi) + " Cluster " + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Thanks for reviewing my notebook