In [1]:
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library

In [2]:
from bs4 import BeautifulSoup
import requests
import lxml

## Fisrt step
- Scrape data from Wikipedia page
- Transform the data to the format which we can easily analyse

In [3]:
# Wikipedia page
wiki_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_tor = requests.get(wiki_link).text

In [4]:
soup_tor = BeautifulSoup(wiki_tor, 'lxml')

In [5]:
table = soup_tor.find('table').text
print(table)



Postcode
Borough
Neighbourhood


M1A
Not assigned
Not assigned


M2A
Not assigned
Not assigned


M3A
North York
Parkwoods


M4A
North York
Victoria Village


M5A
Downtown Toronto
Harbourfront


M5A
Downtown Toronto
Regent Park


M6A
North York
Lawrence Heights


M6A
North York
Lawrence Manor


M7A
Queen's Park
Not assigned


M8A
Not assigned
Not assigned


M9A
Etobicoke
Islington Avenue


M1B
Scarborough
Rouge


M1B
Scarborough
Malvern


M2B
Not assigned
Not assigned


M3B
North York
Don Mills North


M4B
East York
Woodbine Gardens


M4B
East York
Parkview Hill


M5B
Downtown Toronto
Ryerson


M5B
Downtown Toronto
Garden District


M6B
North York
Glencairn


M7B
Not assigned
Not assigned


M8B
Not assigned
Not assigned


M9B
Etobicoke
Cloverdale


M9B
Etobicoke
Islington


M9B
Etobicoke
Martin Grove


M9B
Etobicoke
Princess Gardens


M9B
Etobicoke
West Deane Park


M1C
Scarborough
Highland Creek


M1C
Scarborough
Rouge Hill


M1C
Scarborough
Port Union


M2C
Not assigned
Not assigned

In [6]:
type(table)
table1 = table.split('\n\n')## split the str into list

In [7]:
# Delete the first and last elements in the list because there are blank space
table1.pop(0)
table1.pop(-1)
table1.pop(0)

'Postcode\nBorough\nNeighbourhood'

In [8]:
type(table1)

list

In [9]:
code_list = []
for row in table1:
    dict1 = {}
    dict1 = row.split("\n")
    code_list.append(dict1)

In [10]:
code_df = pd.DataFrame(code_list)
code_df.head()

Unnamed: 0,0,1,2,3
0,,M1A,Not assigned,Not assigned
1,,M2A,Not assigned,Not assigned
2,,M3A,North York,Parkwoods
3,,M4A,North York,Victoria Village
4,,M5A,Downtown Toronto,Harbourfront


In [11]:
code_df.columns

RangeIndex(start=0, stop=4, step=1)

In [12]:
code_df.drop(0, axis=1, inplace=True)
code_df.head()

Unnamed: 0,1,2,3
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [13]:
code_df.columns = ['PostalCode', 'Borough', 'Neighborhood']
code_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [14]:
# Ignore cells with a borough that is Not assigned.
code_df = code_df[(code_df['Borough'] != 'Not assigned')]
code_df.reset_index(inplace=True, drop=True)
code_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [15]:
code_df = code_df.groupby('PostalCode').agg(lambda x:','.join(set(x)))
code_df.head()

Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
M1E,Scarborough,"Morningside,West Hill,Guildwood"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [16]:
code_df1 = pd.DataFrame(code_df)
code_df1.reset_index(level='PostalCode', inplace=True)
code_df1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
2,M1E,Scarborough,"Morningside,West Hill,Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [17]:
code_df1.loc[code_df1['Neighborhood'] == 'Not assigned', 'Neighborhood'] = code_df1.loc[code_df1['Neighborhood'] == 'Not assigned','Borough']
code_df1.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek"
2,M1E,Scarborough,"Morningside,West Hill,Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [18]:
code_df1.shape

(103, 3)

## Second step
- Add Latitude and Longitude to each postal code

In [19]:
#import geocoder # import geocoder

In [20]:
#La_list = []
#Lo_list = []
#for postal_code in code_df1['PostalCode']:
    # initialize your variable to None
#    lat_lng_coords = None

    # loop until you get the coordinates
#    while(lat_lng_coords is None):
#      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#      lat_lng_coords = g.latlng
#
#    latitude = lat_lng_coords[0]
#   longitude = lat_lng_coords[1]
#    La_list.extend[latitude]
#    Lo_list.extend[longitude]

In [22]:
geo_code = pd.read_csv('Geospatial_Coordinates.csv')
geo_code.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [25]:
code_df2 = pd.merge(code_df1, geo_code, left_on='PostalCode', right_on='Postal Code', how='inner')
code_df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill,Port Union,Highland Creek",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Morningside,West Hill,Guildwood",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [26]:
code_df2.shape

(103, 6)

## Third step
- Utilize Foursquare to get more information
- Cluster these neighborhoods
- draw a folium map

In [31]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'EMBSTAQD35KNURG10HIR31DT02G0GNKPSHVH3OS1UVTA2GXR' # your Foursquare ID
CLIENT_SECRET = '2H3MDG5T5AZWLDIG1DDZXRODMSGSHQO4ZC2FGZ31ZSMQOKTW' # your Foursquare Secret
VERSION = '20190220' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EMBSTAQD35KNURG10HIR31DT02G0GNKPSHVH3OS1UVTA2GXR
CLIENT_SECRET:2H3MDG5T5AZWLDIG1DDZXRODMSGSHQO4ZC2FGZ31ZSMQOKTW


In [32]:
code_df2.loc[0, 'Neighborhood']

'Malvern,Rouge'

In [33]:
neighborhood_latitude = code_df2.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = code_df2.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = code_df2.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern,Rouge are 43.806686299999996, -79.19435340000001.


### get the top 200 venues that are in Malvern&Rougel within a radius of 1000 meters.

In [34]:
radius = 1000
Limit = 200

In [35]:
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, radius, Limit)
url

'https://api.foursquare.com/v2/venues/explore?client_id=EMBSTAQD35KNURG10HIR31DT02G0GNKPSHVH3OS1UVTA2GXR&client_secret=2H3MDG5T5AZWLDIG1DDZXRODMSGSHQO4ZC2FGZ31ZSMQOKTW&ll=43.806686299999996,-79.19435340000001&v=20190220&radius=1000&limit=200'

In [36]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c7d7d661ed2196e4880e068'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 17,
  'suggestedBounds': {'ne': {'lat': 43.81568630900001,
    'lng': -79.18190576146081},
   'sw': {'lat': 43.797686290999984, 'lng': -79.20680103853921}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4d669cba83865481c948fa53',
       'name': 'Images Salon & Spa',
       'location': {'address': '8130 Sheppard Ave E',
        'crossStreet': 'Morningside Ave',
        'lat': 43.80228301948931,
        'lng': -79.19856472801668,
        'labeledLatLngs'

In [37]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [43]:
from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
#filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Caribbean Wave,Caribbean Restaurant,43.798558,-79.195777
2,Wendy's,Fast Food Restaurant,43.802008,-79.19808
3,Harvey's,Fast Food Restaurant,43.800106,-79.198258
4,Wendy's,Fast Food Restaurant,43.807448,-79.199056


In [44]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


In [72]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            Limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [73]:
tor_venues = getNearbyVenues(names=code_df2['Neighborhood'],
                                   latitudes=code_df2['Latitude'],
                                   longitudes=code_df2['Longitude']
                                  )

In [74]:
print(tor_venues.shape)
tor_venues.head()

(4837, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern,Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Malvern,Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
2,"Malvern,Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
3,"Malvern,Rouge",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant
4,"Malvern,Rouge",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant


In [75]:
len(tor_venues.Neighborhood.unique())

102

In [76]:
tor_venues_dum = pd.get_dummies(tor_venues[['Venue Category']], prefix="", prefix_sep="")
tor_venues_dum.insert(loc = 0, column = 'Neighbor',value = tor_venues['Neighborhood'])
tor_venues_dum.head()

Unnamed: 0,Neighbor,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
tor_venues_dum1 = tor_venues_dum.groupby('Neighbor').sum().reset_index()
tor_venues_dum1

Unnamed: 0,Neighbor,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,Agincourt,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
1,"Bathurst Manor,Wilson Heights,Downsview North",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,Bayview Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Bedford Park,Lawrence Manor East",1,0,0,0,0,0,1,0,0,...,1,0,0,0,0,0,1,0,0,0
4,Berczy Park,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Birch Cliff,Cliffside West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Brockton,Exhibition Place,Parkdale Village",1,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Business Reply Mail Processing Centre 969 Eastern,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"CFB Toronto,Downsview East",0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
9,Caledonia-Fairbanks,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


### This data set is too sparse, I decide to select the top 10 venues for each 

In [89]:
print("----"+'Agincourt'+"----")
temp = tor_venues_dum1[tor_venues_dum1['Neighbor'] == 'Agincourt'].T.reset_index()
temp.columns = ['venue','freq']
temp = temp.iloc[1:]
temp['freq'] = temp['freq'].astype(float)
temp = temp.round({'freq': 2})
print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(5))
print('\n')

----Agincourt----
                venue  freq
0  Chinese Restaurant   7.0
1       Shopping Mall   3.0
2         Supermarket   2.0
3          Restaurant   2.0
4         Pizza Place   2.0




In [106]:
def get_top_venues(row, num=10):
    row1 = row.iloc[1:]
    row_sorted = row1.sort_values(ascending = False)
    
    return(row_sorted.index.values[0:num])
    

In [107]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
print(columns)
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_venues_dum1['Neighbor']

for ind in np.arange(tor_venues_dum1.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = get_top_venues(tor_venues_dum1.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

['Neighborhood', '1st Most Common Venue', '2nd Most Common Venue', '3rd Most Common Venue', '4th Most Common Venue', '5th Most Common Venue', '6th Most Common Venue', '7th Most Common Venue', '8th Most Common Venue', '9th Most Common Venue', '10th Most Common Venue']


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Supermarket,Pizza Place,Caribbean Restaurant,Bakery,Restaurant,Pool,Mediterranean Restaurant,Skating Rink
1,"Bathurst Manor,Wilson Heights,Downsview North",Pizza Place,Coffee Shop,Community Center,Restaurant,Bridal Shop,Ski Chalet,Ski Area,Shopping Mall,Supermarket,Sushi Restaurant
2,Bayview Village,Bank,Japanese Restaurant,Grocery Store,Café,Chinese Restaurant,Shopping Mall,Skate Park,Convenience Store,Fast Food Restaurant,Farm
3,"Bedford Park,Lawrence Manor East",Italian Restaurant,Fast Food Restaurant,Coffee Shop,Pizza Place,Accessories Store,Comfort Food Restaurant,Sushi Restaurant,Sports Club,Bridal Shop,Skating Rink
4,Berczy Park,Hotel,Café,Coffee Shop,Restaurant,Japanese Restaurant,Park,Italian Restaurant,Cocktail Bar,Bakery,Steakhouse


In [108]:
# set number of clusters
kclusters = 5

tor_clustering = tor_venues_dum1.drop('Neighbor', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 3, 0, 2, 0, 0, 0])

In [109]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = tor_venues

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

tor_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Malvern,Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa,0,Fast Food Restaurant,Bus Station,Sandwich Place,Coffee Shop,Gym,Caribbean Restaurant,Martial Arts Dojo,Fruit & Vegetable Store,Paper / Office Supplies Store,Park
1,"Malvern,Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant,0,Fast Food Restaurant,Bus Station,Sandwich Place,Coffee Shop,Gym,Caribbean Restaurant,Martial Arts Dojo,Fruit & Vegetable Store,Paper / Office Supplies Store,Park
2,"Malvern,Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant,0,Fast Food Restaurant,Bus Station,Sandwich Place,Coffee Shop,Gym,Caribbean Restaurant,Martial Arts Dojo,Fruit & Vegetable Store,Paper / Office Supplies Store,Park
3,"Malvern,Rouge",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant,0,Fast Food Restaurant,Bus Station,Sandwich Place,Coffee Shop,Gym,Caribbean Restaurant,Martial Arts Dojo,Fruit & Vegetable Store,Paper / Office Supplies Store,Park
4,"Malvern,Rouge",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant,0,Fast Food Restaurant,Bus Station,Sandwich Place,Coffee Shop,Gym,Caribbean Restaurant,Martial Arts Dojo,Fruit & Vegetable Store,Paper / Office Supplies Store,Park


In [110]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [112]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)
map_clusters

In [113]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(tor_merged['Neighborhood Latitude'], tor_merged['Neighborhood Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters