# Segmenting and Clustering Neighborhoods in Toronto

# PART ONE

In [1]:
#import packages
import numpy as np 
import pandas as pd
import json 
!pip install requests
import requests 
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup
import lxml

print('Libraries imported.')

Libraries imported.


In [2]:
#STEP_ONE: get data from the wiki url
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
# use BeautifulSoup to open it, define the referenced source list as canada_list
canada_list = BeautifulSoup(source, 'lxml') 
#creat the target dataframe columns and give them a name
toronto_list = pd.DataFrame(columns=['Postalcode','Borough', 'Neighborhood'])

#STEP_TWO: The most import loop process to get data from the web
#inspect structure: div(class)->table(class)->  {thead()->tr()->th(class)} OR {tbody()->tr()->td(table elements)}
content = canada_list.find('div', class_='mw-parser-output')
table = content.table.tbody #can't be writen as the same row from above row
postcode = 0
borough = 0
neighborhood = 0

table_contents=[]
table=canada_list.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [3]:
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


# PART TWO

In [4]:
df.shape

(103, 3)

In [5]:
import csv

#use the csv to inport lat and lng, I'm lazy to get geopy installed
df_coord = pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv')
df_coord.rename(columns={'Postal Code':'PostalCode'}, inplace=True)

#join two table together on Postalcode
df = pd.merge(df, df_coord, how='inner', on = 'PostalCode')
#save the file incase you change the content by chance.
df.to_csv('toronto_df.csv')
df = pd.read_csv('toronto_df.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)
df.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# PART THREE

In [6]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [7]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!pip install folium
import folium 



In [8]:
# Step_1 examine the resulting dataframe.
neighborhoods = df[['Borough','Neighborhood','Latitude','Longitude']]
print('The dataframe has {} boroughs and {} neighborhoods.'.format(len(neighborhoods['Borough'].unique()), neighborhoods.shape[0]))

The dataframe has 15 boroughs and 103 neighborhoods.


In [9]:
# Step_2 Use geopy library to get the latitude and longitude values of Toronto.
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer") #why it;s to_explorer?
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, ON are 43.6534817, -79.3839347.


In [10]:
# Step_3 create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10.4)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'],neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='#E42222',
        fill=True,
        fill_color='#CB9D5B',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [11]:
#Step_4 Chose another object scaborough to analysis
scarborough_data = neighborhoods[neighborhoods['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
#Step_5 get the lat and lng for Scarborough
address = 'Scarborough, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.7729744, -79.2576479.


In [13]:
#Step_6 create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11.2)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='#E42222',
        fill=True,
        fill_color='#CB9D5B',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

In [14]:
CLIENT_ID = 'USGLUK2A23P0JR2EHSDAI1G0BSCY45RRXH4AKNPS1QK54WQU' #key changed after cell ran for privacy purpose
CLIENT_SECRET = '0NA3JF4XKSEYF01WSF5RVMOKN1ITQNNAZ0OZM4OGP10H3II2' #key changed after cell ran for privacy purpose

VERSION = '20180605' # Foursquare API version
LIMIT = 100 

In [15]:
#Step_8 Get neighbor's lat and lng
neighborhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = scarborough_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.8066863, -79.1943534.


In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
scarborough_venues = getNearbyVenues(names=scarborough_data['Neighborhood'],latitudes=scarborough_data['Latitude'],longitudes=scarborough_data['Longitude'])

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


In [18]:
#Step_12 Check the size of venues
print(scarborough_venues.shape)
scarborough_venues.head()

(91, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank


In [19]:
#Step_13 How many venues each neighbor has
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,8,8,8,8,8,8
"Clarks Corners, Tam O'Shanter, Sullivan",13,13,13,13,13,13
"Cliffside, Cliffcrest, Scarborough Village West",2,2,2,2,2,2
"Dorset Park, Wexford Heights, Scarborough Town Centre",6,6,6,6,6,6
"Golden Mile, Clairlea, Oakridge",9,9,9,9,9,9
"Guildwood, Morningside, West Hill",9,9,9,9,9,9
"Kennedy Park, Ionview, East Birchmount Park",5,5,5,5,5,5
"Malvern, Rouge",1,1,1,1,1,1


In [20]:
#Step_13 how many unique catetories in those venues
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 56 uniques categories.


In [21]:
#Step_14 Analize each neighbor
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Department Store,Discount Store,Donut Shop,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,Gas Station,General Entertainment,Golf Course,Hakka Restaurant,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean BBQ Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Restaurant,Sandwich Place,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Supermarket,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
scarborough_onehot.shape

(91, 57)

In [23]:
#Step_15 Group neighbor and confirm Size
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.shape

(16, 57)

In [24]:
#Step_16 print neighbor and it's top 5 venues
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0  Latin American Restaurant   0.2
1                     Lounge   0.2
2             Breakfast Spot   0.2
3               Skating Rink   0.2
4             Clothing Store   0.2


----Birch Cliff, Cliffside West----
                   venue  freq
0  General Entertainment  0.25
1           Skating Rink  0.25
2                   Café  0.25
3        College Stadium  0.25
4    American Restaurant  0.00


----Cedarbrae----
                venue  freq
0         Gas Station  0.12
1  Athletics & Sports  0.12
2              Bakery  0.12
3                Bank  0.12
4     Thai Restaurant  0.12


----Clarks Corners, Tam O'Shanter, Sullivan----
                  venue  freq
0           Pizza Place  0.15
1  Fast Food Restaurant  0.15
2   Fried Chicken Joint  0.08
3              Pharmacy  0.08
4    Italian Restaurant  0.08


----Cliffside, Cliffcrest, Scarborough Village West----
                 venue  freq
0  American Restaurant   0.5
1             

In [25]:
#Step_17 data like above are ugly, let's put them into dataframe and get 10 venues for each neighborhood this time
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Lounge,Breakfast Spot,Skating Rink,Clothing Store,American Restaurant,Pharmacy,Italian Restaurant,Korean BBQ Restaurant,Medical Center
1,"Birch Cliff, Cliffside West",General Entertainment,Skating Rink,Café,College Stadium,American Restaurant,Pharmacy,Korean BBQ Restaurant,Latin American Restaurant,Lounge,Medical Center
2,Cedarbrae,Gas Station,Athletics & Sports,Bakery,Bank,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,American Restaurant,Park
3,"Clarks Corners, Tam O'Shanter, Sullivan",Pizza Place,Fast Food Restaurant,Fried Chicken Joint,Pharmacy,Italian Restaurant,Noodle House,Chinese Restaurant,Gas Station,Bank,Thai Restaurant
4,"Cliffside, Cliffcrest, Scarborough Village West",American Restaurant,Motel,Asian Restaurant,Intersection,Italian Restaurant,Korean BBQ Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station


In [26]:
#Step_18 be patient...I'm close to the end, cluster the neighborhood into five clusters
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 2, 1, 1, 1, 1, 3], dtype=int32)

In [27]:
#Step_19 new data for the following graph
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = scarborough_data

# merge scarborough_grouped with scarborough_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

scarborough_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353,3.0,Fast Food Restaurant,American Restaurant,Indian Restaurant,Italian Restaurant,Korean BBQ Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Golf Course,Bar,Construction & Landscaping,American Restaurant,Pharmacy,Korean BBQ Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Electronics Store,Intersection,Donut Shop,Rental Car Location,Bank,Medical Center,Breakfast Spot,Restaurant,Mexican Restaurant,American Restaurant
3,Scarborough,Woburn,43.770992,-79.216917,4.0,Coffee Shop,Korean BBQ Restaurant,Indian Restaurant,Italian Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Motel
4,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Gas Station,Athletics & Sports,Bakery,Bank,Thai Restaurant,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,American Restaurant,Park


In [28]:
scarborough_merged.dtypes

Borough                    object
Neighborhood               object
Latitude                  float64
Longitude                 float64
Cluster Labels            float64
1st Most Common Venue      object
2nd Most Common Venue      object
3rd Most Common Venue      object
4th Most Common Venue      object
5th Most Common Venue      object
6th Most Common Venue      object
7th Most Common Venue      object
8th Most Common Venue      object
9th Most Common Venue      object
10th Most Common Venue     object
dtype: object

In [29]:
#This role is for when you need to use the cluster for data, but color=rainbow[cluster-1] can't take float, then you have to convert the Cluster Labels into int.
scarborough_merged= scarborough_merged.fillna(0)
scarborough_merged[['Cluster Labels']] = scarborough_merged[['Cluster Labels']].astype("int")

In [30]:
#Step_20 Last step, view the clustered data
# create map
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters