# Segmenting and Clustering Neighborhoods in Toronto


## Webscraping Wikipedia page

In [94]:
import pandas as pd

my_url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(my_url)[0]
print("Dataframe successfully retreived... \nDataframe shape: [%i,%i]" % (df.shape[0],df.shape[1]))


Dataframe successfully retreived... 
Dataframe shape: [288,3]


## Removing not assigned boroughs

In [95]:
print("Removing \"not assigned\" boroughs...")
df=df[df.Borough != 'Not assigned']
print("Dataframe shape: [%i,%i]" % (df.shape[0],df.shape[1]))


Removing "not assigned" boroughs...
Dataframe shape: [211,3]


# Grouping by Postcode

In [96]:
df1=df.groupby('Postcode').agg({'Borough':lambda x: set(x),'Neighbourhood':lambda x: list(x)}).reset_index()
#df1['Neighbourhood']='-'.join(df1['Neighbourhood'])
df1['Neighbourhood']=df1.Neighbourhood.apply(', '.join)
df1['Borough']=df1.Borough.apply(', '.join)
df1

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Changing "not assigned" neighborhoods to the according borough

In [97]:
df1['Neighbourhood'] = df1['Neighbourhood'].replace('Not assigned',df1['Borough'])
print('See the Postcode = M7A row. The Neighbourhood has changed from "Not assigned" to "Queens Park"')
df1

See the Postcode = M7A row. The Neighbourhood has changed from "Not assigned" to "Queens Park"


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Checking the shape of the dataframe

In [98]:
print('The dataframe has %i rows and %i columns.' % (df1.shape[0],df1.shape[1]))

The dataframe has 103 rows and 3 columns.


## Installing geocoder package for the coordinate extraction

In [99]:
!pip install geocoder
import geocoder # import geocoder




## Adding two columns for longitude and latitude in the dataframe

In [100]:
df1["Latitude"] = ""
df1["Longitude"] = ""
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",,
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",,
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",,
3,M1G,Scarborough,Woburn,,
4,M1H,Scarborough,Cedarbrae,,


## Looping through dataframe and filling lat/long pairs using geocoder (with HERE provider)
I registered at HERE.com to get the APP_ID and APP_CODE

In [112]:
# The code was removed by Watson Studio for sharing.

In [116]:
import time

for index, row in df1.iterrows():

    # initialize your variable to None
    lat_lng_coords = None
    postal_code=row['Postcode']
    print('Checking Postcode %s' % postal_code)   
    search_string=postal_code + ',Toronto'
    try:
        g = geocoder.here(search_string,app_id=my_app_id,app_code=my_app_code)
        lat_lng_coords = g.latlng    
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
        print("lat/long = %f/%f" % (latitude,longitude))
        df1.Latitude.iloc[index] = latitude
        df1.Longitude.iloc[index] = longitude
    except:
        print('Could not retreive coordinates for %s ...' % postal_code)
        
    #time.sleep(1)
    

Checking Postcode M1B
lat/long = 43.811530/-79.195520
Checking Postcode M1C
lat/long = 43.785670/-79.158720
Checking Postcode M1E
lat/long = 43.765820/-79.175190
Checking Postcode M1G
lat/long = 43.768370/-79.217590
Checking Postcode M1H
lat/long = 43.769690/-79.239440
Checking Postcode M1J
lat/long = 43.743130/-79.231750
Checking Postcode M1K
lat/long = 43.726280/-79.263630
Checking Postcode M1L
lat/long = 43.713050/-79.285050
Checking Postcode M1M
lat/long = 43.724230/-79.227920
Checking Postcode M1N
lat/long = 43.696770/-79.259970
Checking Postcode M1P
lat/long = 43.759980/-79.268970
Checking Postcode M1R
lat/long = 43.750710/-79.300560
Checking Postcode M1S
lat/long = 43.793940/-79.267980
Checking Postcode M1T
lat/long = 43.784730/-79.299070
Checking Postcode M1V
lat/long = 43.817690/-79.280190
Checking Postcode M1W
lat/long = 43.800880/-79.320740
Checking Postcode M1X
lat/long = 43.834220/-79.216700
Checking Postcode M2H
lat/long = 43.802850/-79.356210
Checking Postcode M2J
lat/lo

In [118]:
df1

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8115,-79.1955
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7857,-79.1587
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7658,-79.1752
3,M1G,Scarborough,Woburn,43.7684,-79.2176
4,M1H,Scarborough,Cedarbrae,43.7697,-79.2394
5,M1J,Scarborough,Scarborough Village,43.7431,-79.2318
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.7263,-79.2636
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.7131,-79.285
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.7242,-79.2279
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6968,-79.26


In [122]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

## Visualizing the neighbourhoods of Toronto

In [128]:
# create map of New York using latitude and longitude values
Toronto_map = folium.Map(location=[43.7337, -79.5175], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df1['Latitude'], df1['Longitude'], df1['Borough'], df1['Neighbourhood']):
    #print(borough)
    try:
        label = '{}, {}'.format(neighborhood, borough)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(Toronto_map)  
    except:
        print('error')
Toronto_map

error
error


## Let's focus on the Toronto Downtown Neighborhoods. First filter by Borough = Downtown Toronto

In [143]:
DT =  df1['Borough']=="Downtown Toronto"
DT = df1[DT].reset_index()
DT=DT[DT.Latitude != '']
#DT.drop([69])
DT

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,50,M4W,Downtown Toronto,Rosedale,43.6822,-79.3779
1,51,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.6682,-79.3666
2,52,M4Y,Downtown Toronto,Church and Wellesley,43.6666,-79.3813
3,53,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.6503,-79.3592
4,54,M5B,Downtown Toronto,"Ryerson, Garden District",43.6574,-79.3782
5,55,M5C,Downtown Toronto,St. James Town,43.6512,-79.3755
6,56,M5E,Downtown Toronto,Berczy Park,43.6452,-79.3737
7,57,M5G,Downtown Toronto,Central Bay Street,43.6561,-79.3849
8,58,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.6497,-79.3826
9,59,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.6302,-79.3624


## Define Foursquare credentials

In [144]:
CLIENT_ID = '5XCC3VTUBF2U4RA2X1GOHT40OMIHJMEIU4KWIYJX1OXM24LP' # your Foursquare ID
CLIENT_SECRET = 'AOQ53GUDIBERKNOVVS01LQVCLWBOLBEHTCYDARDTTZKXQX0Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5XCC3VTUBF2U4RA2X1GOHT40OMIHJMEIU4KWIYJX1OXM24LP
CLIENT_SECRET:AOQ53GUDIBERKNOVVS01LQVCLWBOLBEHTCYDARDTTZKXQX0Z


## Let's create a function to get all venues around a certain location in a certain radius

In [160]:
import requests

def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT=100):
    
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Apply this function on the Detroit Downtown dataframe and examine the according venue dataframe

In [161]:
DT_venues = getNearbyVenues(names=DT['Neighbourhood'],
                                   latitudes=DT['Latitude'],
                                   longitudes=DT['Longitude']
                                  )

Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
First Canadian Place, Underground city
Christie


In [162]:
print(DT_venues.shape)
DT_venues.head()

(1160, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.68221,-79.37795,Summerhill Market,43.686265,-79.375458,Grocery Store
1,Rosedale,43.68221,-79.37795,Rosedale Park,43.682328,-79.378934,Playground
2,Rosedale,43.68221,-79.37795,Whitney Park,43.682036,-79.373788,Park
3,Rosedale,43.68221,-79.37795,Mooredale House,43.678631,-79.380091,Building
4,Rosedale,43.68221,-79.37795,Scoops Convenience Boutique,43.686148,-79.375828,Candy Store


In [163]:
print('There are {} uniques categories.'.format(len(DT_venues['Venue Category'].unique())))

There are 186 uniques categories.


## Apply one-hot encoding

In [166]:
# one hot encoding
DT_onehot = pd.get_dummies(DT_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
DT_onehot['Neighbourhood'] = DT_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [DT_onehot.columns[-1]] + list(DT_onehot.columns[:-1])
DT_onehot = DT_onehot[fixed_columns]
DT_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Trail,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Check how frequent a certain venue type occurs in a specific neighbourhood

In [168]:
DT_onehot.shape

(1160, 187)

In [169]:
DT_grouped = DT_onehot.groupby('Neighbourhood').mean().reset_index()
DT_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Trail,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.03,0.0,0.01,0.0,0.0,0.03,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.015625,...,0.0,0.0,0.0,0.015625,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,...,0.0,0.0,0.0,0.014286,0.014286,0.0,0.0,0.0,0.0,0.014286
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0
5,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.021978,0.0,0.021978,0.0,0.0,0.0,...,0.0,0.0,0.0,0.043956,0.0,0.0,0.043956,0.010989,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.012346,0.012346,0.0,0.0,0.0,0.012346,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012346,0.0,0.0,0.012346,0.012346
8,"Commerce Court, Victoria Hotel",0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.0,0.01,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.03,0.0,0.01,0.0,0.0,0.01,0.0,0.0,...,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0


In [170]:
DT_grouped.shape

(17, 187)

## Check the five top venue categories per neighbourhood

In [173]:
num_top_venues = 5

for hood in DT_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = DT_grouped[DT_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
         venue  freq
0  Coffee Shop  0.08
1         Café  0.06
2        Hotel  0.05
3   Steakhouse  0.04
4          Bar  0.03


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1          Restaurant  0.05
2        Cocktail Bar  0.05
3  Seafood Restaurant  0.03
4                Café  0.03


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
                venue  freq
0         Coffee Shop  0.10
1  Italian Restaurant  0.07
2          Restaurant  0.04
3                 Bar  0.04
4                Café  0.04


----Cabbagetown, St. James Town----
         venue  freq
0  Coffee Shop  0.10
1   Restaurant  0.05
2       Market  0.05
3  Pizza Place  0.05
4       Bakery  0.05


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.09
1      Clothing Store  0.07
2      Ice Cream Shop  0.03
3  Italian Restaurant  0.03
4      Cosmetics Shop  0.03


---

In [174]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [185]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = DT_grouped['Neighbourhood']

for ind in np.arange(DT_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(DT_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Hotel,Steakhouse,Bar,Asian Restaurant,Burger Joint,Japanese Restaurant,Gastropub,Restaurant
1,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Farmers Market,Breakfast Spot,Cheese Shop,Steakhouse,Beer Bar,Seafood Restaurant,Bakery
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Italian Restaurant,Bar,Café,Restaurant,Park,Pub,Bakery,Sandwich Place,Pet Store
3,"Cabbagetown, St. James Town",Coffee Shop,Italian Restaurant,Restaurant,Café,Pizza Place,Market,Bakery,Snack Place,Jewelry Store,Playground
4,Central Bay Street,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Ice Cream Shop,Italian Restaurant,Cosmetics Shop,Gym / Fitness Center,Plaza,Lingerie Store,Café


## Now, let's cluster the neighbourhoods into three clusters depending on venue similarity

In [186]:
# import k-means from clustering stage
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 3

DT_grouped_clustering = DT_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(DT_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 0, 2, 2, 2], dtype=int32)

In [187]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

DT_merged = DT

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
DT_merged = DT_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

DT_merged.head() # check the last columns!

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,50,M4W,Downtown Toronto,Rosedale,43.6822,-79.3779,0,Playground,Building,Park,Grocery Store,Candy Store,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
1,51,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.6682,-79.3666,2,Coffee Shop,Italian Restaurant,Restaurant,Café,Pizza Place,Market,Bakery,Snack Place,Jewelry Store,Playground
2,52,M4Y,Downtown Toronto,Church and Wellesley,43.6666,-79.3813,2,Coffee Shop,Restaurant,Japanese Restaurant,Gay Bar,Sushi Restaurant,Men's Store,Pub,Gastropub,Dance Studio,Hotel
3,53,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.6503,-79.3592,2,Coffee Shop,Bakery,Café,Theater,Brewery,Boat or Ferry,Spanish Restaurant,Spa,Breakfast Spot,French Restaurant
4,54,M5B,Downtown Toronto,"Ryerson, Garden District",43.6574,-79.3782,2,Coffee Shop,Clothing Store,Cosmetics Shop,Middle Eastern Restaurant,Italian Restaurant,Café,Restaurant,Tea Room,Japanese Restaurant,Hotel


## Finally, let's visualize where the clusters are located geographically

In [190]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[43.7337, -79.5175], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(DT_merged['Latitude'], DT_merged['Longitude'], DT_merged['Neighbourhood'], DT_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters