# Segmenting and Clustering Neighborhoods in Toronto

[Introduction](#Introduction)

##### Importing libraries

In [34]:
import pandas as pd
!pip install geocoder
import geocoder # import geocoder
!pip install geopy
from geopy.geocoders import Nominatim 
!pip install folium
import folium
import requests
import json
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors



<a id='Introduction'></a>
## 1. Introduction

## 1. Download and Explore Dataset

##### Importing Toronto's list of postal codes :

In [35]:

toronto_postal_code=pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M',flavor='bs4')

toronto_Neighborhoods=toronto_postal_code[0]



toronto_Neighborhoods.rename(columns={"Postal Code":"PostalCode"},inplace=True)
toronto_Neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


##### Ignoring all the "Not assigned" boroughs :

In [36]:

toronto_Neighborhoods=toronto_Neighborhoods[toronto_Neighborhoods.Borough != "Not assigned"]
toronto_Neighborhoods.reset_index(drop=True, inplace=True)
toronto_Neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Displaying toronto_Neighborhoods data frame size :

In [37]:
toronto_Neighborhoods.shape


(103, 3)

##### Importing coordinates from .csv file :

In [38]:
coordinates_df=pd.read_csv('https://cocl.us/Geospatial_data')

#coordinates_df.sort_values(by=['Postal Code'],inplace = True)
coordinates_df.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


##### Retreving coordinates for each postal code 

In [39]:
# Creating columns for the coordinates
toronto_Neighborhoods['Latitude']=""
toronto_Neighborhoods['Longitude']=""

# Retrieving coordinates values from coordinates_df
for i in range(len(toronto_Neighborhoods)):
    ind=coordinates_df[coordinates_df["Postal Code"]==toronto_Neighborhoods.iloc[i,0]].index.values[0]
    toronto_Neighborhoods['Latitude'][i]=round(coordinates_df['Latitude'][ind],6)
    toronto_Neighborhoods['Longitude'][i]=coordinates_df['Longitude'][ind]
   
    
toronto_Neighborhoods.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6543,-79.3606
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7185,-79.4648
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6623,-79.3895
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6679,-79.5322
6,M1B,Scarborough,"Malvern, Rouge",43.8067,-79.1944
7,M3B,North York,Don Mills,43.7459,-79.3522
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7064,-79.3099
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3789


In [40]:
toronto_Neighborhoods.shape

(103, 5)

##### Retrieving the geographical coordinates of Toronto

In [41]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


##### Toronto neighbourhood visualization

In [42]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_Neighborhoods['Latitude'], toronto_Neighborhoods['Longitude'], toronto_Neighborhoods['Borough'], toronto_Neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

 #### Foursquare Credentials and Version

In [43]:
CLIENT_ID = 'FI0NVYWODT3CVTGQXA3CAU5EYXQ4OTAEUBECRSIL44SAOQVU' # your Foursquare ID
CLIENT_SECRET = 'KOYST14NLBRHIX2GNX4EK2RNSOLNYX0QFECBQ2OBZDEFWSNE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
radius=500
LIMIT=100




Your credentails:
CLIENT_ID: FI0NVYWODT3CVTGQXA3CAU5EYXQ4OTAEUBECRSIL44SAOQVU
CLIENT_SECRET:KOYST14NLBRHIX2GNX4EK2RNSOLNYX0QFECBQ2OBZDEFWSNE


## 2. Explore Neighborhoods in Toronto

##### get_category_type function from the Foursquare lab

In [44]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [45]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

##### Getting the top 100 venues that are in each Toronto boroughs within a radius of 500 meters :

In [46]:

toronto_venues = getNearbyVenues(names=toronto_Neighborhoods['Neighbourhood'],
                                   latitudes=toronto_Neighborhoods['Latitude'],
                                   longitudes=toronto_Neighborhoods['Longitude']
                                  )


Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

##### Checking the size of the resulting dataframe

In [47]:
print(toronto_venues.shape)
toronto_venues.head()

(2156, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


Checking how many venues were returned for each neighborhood

In [48]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",27,27,27,27,27,27
Berczy Park,57,57,57,57,57,57
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",25,25,25,25,25,25
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",19,19,19,19,19,19
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16


## 3. Analyze Each Neighborhood

In [49]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Confirming the new size :

In [50]:
toronto_onehot.shape

(2156, 273)

####  Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category :

In [51]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
1,"Alderwood, Long Branch",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
3,Bayview Village,0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
4,"Bedford Park, Lawrence Manor East",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.037037,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
5,Berczy Park,0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.017544,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
6,"Birch Cliff, Cliffside West",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.080000
8,"Business reply mail Processing Centre, South C...",0.000,0.000000,0.000000,0.0000,0.000,0.000,0.000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.052632
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.000,0.000000,0.062500,0.0625,0.125,0.125,0.125,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000


#### Let's confirm the new size

In [52]:
toronto_grouped.shape

(95, 273)

#### Printing each neighborhood along with the top 5 most common venues :

In [53]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0                     Lounge  0.25
1             Breakfast Spot  0.25
2  Latin American Restaurant  0.25
3               Skating Rink  0.25
4          Accessories Store  0.00


----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.25
1        Pharmacy  0.12
2             Pub  0.12
3  Sandwich Place  0.12
4             Gym  0.12


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0                       Bank  0.09
1                Coffee Shop  0.09
2              Shopping Mall  0.04
3  Middle Eastern Restaurant  0.04
4          Mobile Phone Shop  0.04


----Bayview Village----
                 venue  freq
0                 Café  0.25
1                 Bank  0.25
2  Japanese Restaurant  0.25
3   Chinese Restaurant  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                     venue  freq
0       Italian Restaurant  0.11
1              Co

#### Converting that into a *pandas* dataframe 

In [54]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues]

##### Creating a new dataframe and display the top 10 venues for each neighborhood

In [55]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
boroughs_venues_sorted = pd.DataFrame(columns=columns)
boroughs_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    boroughs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

boroughs_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Breakfast Spot,Skating Rink,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Sandwich Place,Pharmacy,Pool,Pub,Gym,Airport Terminal,Dessert Shop,Falafel Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Shopping Mall,Sandwich Place,Restaurant,Supermarket,Ice Cream Shop,Sushi Restaurant,Middle Eastern Restaurant,Mobile Phone Shop
3,Bayview Village,Café,Chinese Restaurant,Bank,Japanese Restaurant,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dim Sum Restaurant
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Restaurant,Sandwich Place,Juice Bar,Hobby Shop,Pharmacy,Pizza Place,Pub,Café
5,Berczy Park,Coffee Shop,Restaurant,Beer Bar,Bakery,Cocktail Bar,Café,Farmers Market,Seafood Restaurant,Cheese Shop,Breakfast Spot
6,"Birch Cliff, Cliffside West",College Stadium,General Entertainment,Skating Rink,Café,Comic Shop,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
7,"Brockton, Parkdale Village, Exhibition Place",Café,Yoga Studio,Coffee Shop,Breakfast Spot,Bakery,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
8,"Business reply mail Processing Centre, South C...",Light Rail Station,Yoga Studio,Garden Center,Recording Studio,Skate Park,Burrito Place,Auto Workshop,Fast Food Restaurant,Farmers Market,Spa
9,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Boat or Ferry,Boutique,Bar,Plane,Coffee Shop,Sculpture Garden,Rental Car Location


## 4. Cluster Neighborhoods

Run *k*-means to cluster the neighborhood into 5 clusters.

In [56]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe

kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [57]:
# add clustering labels
boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_Neighborhoods
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(boroughs_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.dropna(axis=0,inplace=True)

toronto_merged.head(10) # check the last columns!


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.7533,-79.3297,1.0,Food & Drink Shop,Park,Convenience Store,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,M4A,North York,Victoria Village,43.7259,-79.3156,1.0,Hockey Arena,Coffee Shop,Portuguese Restaurant,Financial or Legal Service,French Restaurant,Pizza Place,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Electronics Store
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6543,-79.3606,1.0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Beer Store,Shoe Store,Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7185,-79.4648,1.0,Furniture / Home Store,Clothing Store,Vietnamese Restaurant,Boutique,Gift Shop,Accessories Store,Coffee Shop,Event Space,Concert Hall,Comic Shop
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6623,-79.3895,1.0,Coffee Shop,Diner,College Auditorium,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Portuguese Restaurant
6,M1B,Scarborough,"Malvern, Rouge",43.8067,-79.1944,1.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Women's Store
7,M3B,North York,Don Mills,43.7459,-79.3522,1.0,Gym,Restaurant,Beer Store,Japanese Restaurant,Café,Coffee Shop,Dim Sum Restaurant,Caribbean Restaurant,Discount Store,Supermarket
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7064,-79.3099,1.0,Pizza Place,Gym / Fitness Center,Café,Athletics & Sports,Intersection,Gastropub,Bank,Pharmacy,Eastern European Restaurant,Dumpling Restaurant
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3789,1.0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Italian Restaurant,Electronics Store,Ramen Restaurant,Bookstore
10,M6B,North York,Glencairn,43.7096,-79.4451,1.0,Park,Sushi Restaurant,Pub,Japanese Restaurant,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop


Finally, let's visualize the resulting clusters

In [58]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged["Cluster Labels"].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    
       
map_clusters

## 5. Examine Clusters

#### Cluster 1

In [59]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,York,0.0,Park,Women's Store,Pool,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
52,North York,0.0,Park,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
64,York,0.0,Park,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
66,North York,0.0,Park,Convenience Store,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
85,Scarborough,0.0,Park,Playground,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


#### Cluster 2

In [60]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Food & Drink Shop,Park,Convenience Store,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,North York,1.0,Hockey Arena,Coffee Shop,Portuguese Restaurant,Financial or Legal Service,French Restaurant,Pizza Place,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Electronics Store
2,Downtown Toronto,1.0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Beer Store,Shoe Store,Restaurant
3,North York,1.0,Furniture / Home Store,Clothing Store,Vietnamese Restaurant,Boutique,Gift Shop,Accessories Store,Coffee Shop,Event Space,Concert Hall,Comic Shop
4,Downtown Toronto,1.0,Coffee Shop,Diner,College Auditorium,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Burrito Place,Café,Portuguese Restaurant
6,Scarborough,1.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Women's Store
7,North York,1.0,Gym,Restaurant,Beer Store,Japanese Restaurant,Café,Coffee Shop,Dim Sum Restaurant,Caribbean Restaurant,Discount Store,Supermarket
8,East York,1.0,Pizza Place,Gym / Fitness Center,Café,Athletics & Sports,Intersection,Gastropub,Bank,Pharmacy,Eastern European Restaurant,Dumpling Restaurant
9,Downtown Toronto,1.0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Japanese Restaurant,Bubble Tea Shop,Italian Restaurant,Electronics Store,Ramen Restaurant,Bookstore
10,North York,1.0,Park,Sushi Restaurant,Pub,Japanese Restaurant,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop


#### Cluster 3

In [61]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,2.0,Food Service,Baseball Field,Yoga Studio,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Diner
101,Etobicoke,2.0,Baseball Field,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Field


#### Cluster 4

In [62]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,3.0,Bar,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Field


#### Cluster 5

In [63]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,North York,4.0,Park,Basketball Court,Bakery,Construction & Landscaping,Trail,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
68,Central Toronto,4.0,Park,Jewelry Store,Sushi Restaurant,Trail,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
83,Central Toronto,4.0,Park,Restaurant,Trail,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
91,Downtown Toronto,4.0,Park,Playground,Trail,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop
