# Clustering Neighborhoods of Toronto


This *Jupyter Notebook* will be used for the first part of the 2nd assignment of Applied Data Science Capstone. <br>
Start importing pandas libraries.

In [1]:
import pandas as pd
import requests
import numpy as np

The *pandas.read_html* method uses BeautifulSoap as backend, so we use this to read the table directly to our pandas dataframe. Then removing rows with a borough that is 'Not assigned' and merging rows with duplicated 'Postcode' values, the 'Neighbourhood' will become CSV strings. Finally, 'Not assigned' neighborhood will be set to the same as its borough. Reading the 'Geospatial_Coordinates.csv' CSV file with the geographical coordinates of each postal code (try to avoid installing packages on my laptop which can be very unreliable, like geocoder) and joining the two tables along the postcode and assign the result to table. The following code is a replica from the first part of the assignment.

In [2]:
html_page=requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
df=pd.read_html(html_page,flavor='bs4',header=0)[0]
df=df[df.Borough!='Not assigned']
table=pd.DataFrame(columns=['Postcode','Borough','Neighbourhood'])
dic=df.groupby('Postcode',sort=False).groups
for key in dic:
    string=', '.join(list(df.loc[dic[key]]['Neighbourhood']))
    table=table.append({'Postcode':key,'Borough':df.loc[dic[key]]['Borough'].iloc[0],'Neighbourhood':string},ignore_index=True)
idx=(table['Neighbourhood']=='Not assigned').nonzero()
table['Neighbourhood'].iloc[idx]=table['Borough'].iloc[idx]
df=pd.read_csv('Geospatial_Coordinates.csv')
neighborhoods=table.join(df.set_index('Postal Code'), on='Postcode')
neighborhoods

  if __name__ == '__main__':


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


Let's plot the map of Toronto with the markers of the postal codes.

In [3]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

latitude=neighborhoods['Latitude'].mean()
longitude=neighborhoods['Longitude'].mean()
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto

Restricting the analysis to boroughs that contain the word Toronto:

In [4]:
toronto=neighborhoods[neighborhoods['Borough'].str.contains('Toronto')].reset_index()
toronto

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,19,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,30,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
8,31,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
9,36,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752


In [5]:
latitude=toronto['Latitude'].mean()
longitude=toronto['Longitude'].mean()
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)
for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

#### Define Foursquare Credentials and Version for requesting venues through the API.

In [6]:
CLIENT_ID = 'NSQVNDPYZB0FIKJWDSUXH3BTSDVJGVGFPZNCDUCD2KFYC14J' # your Foursquare ID
CLIENT_SECRET = 'SEZNIKT3GAWHQV5P2ISRRAVBCOK3CVZJSEMJTWQE42Y0JER5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

#### Define a function which request venues for a neighbourhood.

In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Requesting vanues for each borough of Toronto from the restricted list, and create a new dataframe called *toronto_venues*.

In [8]:
toronto_venues = getNearbyVenues(names=toronto['Neighbourhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [34]:
toronto_venues

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Harbourfront, Regent Park",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Harbourfront, Regent Park",43.654260,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,"Harbourfront, Regent Park",43.654260,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
4,"Harbourfront, Regent Park",43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
5,"Harbourfront, Regent Park",43.654260,-79.360636,Impact Kitchen,43.656369,-79.356980,Restaurant
6,"Harbourfront, Regent Park",43.654260,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
7,"Harbourfront, Regent Park",43.654260,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
8,"Harbourfront, Regent Park",43.654260,-79.360636,Corktown Common,43.655618,-79.356211,Park
9,"Harbourfront, Regent Park",43.654260,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site


Let's check how many venues were returned for each neighborhood.

In [35]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,55,55,55,55,55,55
"Brockton, Exhibition Place, Parkdale Village",19,19,19,19,19,19
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",14,14,14,14,14,14
"Cabbagetown, St. James Town",44,44,44,44,44,44
Central Bay Street,81,81,81,81,81,81
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,87,87,87,87,87,87


#### Let's find out how many unique categories can be curated from all the returned venues

In [36]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 235 uniques categories.


Make one-hot encoding for the categorical variables.

In [37]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


The new dataframe size is:

In [38]:
toronto_onehot.shape

(1698, 236)

#### Next, let's group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category

In [39]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,...,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.012346
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.05,0.0,0.05,0.01,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,...,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.0,0.011494


And check the size of the new dataframe.

In [40]:
toronto_grouped.shape

(38, 236)

We will define a function to sort the venues in descending order.

In [41]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [42]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Bar,Asian Restaurant,Gym,Clothing Store,Bakery
1,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Bakery,Farmers Market,Cheese Shop,Seafood Restaurant,Beer Bar,Italian Restaurant,Café
2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Breakfast Spot,Yoga Studio,Italian Restaurant,Pet Store,Convenience Store,Climbing Gym,Caribbean Restaurant,Burrito Place
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Fast Food Restaurant,Restaurant,Butcher,Skate Park,Auto Workshop,Moving Target,Burrito Place,Garden,Garden Center
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Airport Gate,Harbor / Marina,Airport


### Cluster neighbourhoods

In [43]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [44]:
toronto_grouped_clustering

Unnamed: 0,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.142857,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.022727,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,...,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.012346
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.05,0.0,0.05,0.01,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.011494,0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,...,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.0,0.011494


In [45]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,2,Coffee Shop,Park,Pub,Café,Bakery,Theater,Breakfast Spot,Mexican Restaurant,Restaurant,French Restaurant
1,9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,2,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Tea Room,Ramen Restaurant,Bar,Plaza
2,15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Coffee Shop,Restaurant,Café,Hotel,Clothing Store,Breakfast Spot,Cocktail Bar,Cosmetics Shop,Bakery,Italian Restaurant
3,19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Neighborhood,Boutique,Coffee Shop,Pub,Dog Run,Filipino Restaurant,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
4,20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Coffee Shop,Restaurant,Cocktail Bar,Bakery,Farmers Market,Cheese Shop,Seafood Restaurant,Beer Bar,Italian Restaurant,Café


In [46]:
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,"Adelaide, King, Richmond",Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Bar,Asian Restaurant,Gym,Clothing Store,Bakery
1,2,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Bakery,Farmers Market,Cheese Shop,Seafood Restaurant,Beer Bar,Italian Restaurant,Café
2,2,"Brockton, Exhibition Place, Parkdale Village",Coffee Shop,Café,Breakfast Spot,Yoga Studio,Italian Restaurant,Pet Store,Convenience Store,Climbing Gym,Caribbean Restaurant,Burrito Place
3,2,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Fast Food Restaurant,Restaurant,Butcher,Skate Park,Auto Workshop,Moving Target,Burrito Place,Garden,Garden Center
4,2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Terminal,Airport Service,Plane,Sculpture Garden,Boutique,Boat or Ferry,Airport Gate,Harbor / Marina,Airport
5,2,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Pub,Pizza Place,Café,Italian Restaurant,Bakery,Market,Breakfast Spot,Chinese Restaurant
6,2,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Burger Joint,Bar,Middle Eastern Restaurant,Thai Restaurant,Salad Place,Bubble Tea Shop,Spa
7,2,"Chinatown, Grange Park, Kensington Market",Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bakery,Dumpling Restaurant,Coffee Shop,Mexican Restaurant,Chinese Restaurant,Dim Sum Restaurant
8,2,Christie,Café,Grocery Store,Park,Convenience Store,Coffee Shop,Baby Store,Restaurant,Italian Restaurant,Nightclub,Diner
9,2,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Burger Joint,Pub,Men's Store,Mediterranean Restaurant,Fast Food Restaurant


In [47]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters