In [4]:
import requests
import pandas as pd
import numpy as np
from lxml import etree
#!conda install -c conda-forge folium=0.5.0 --yes
import folium
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim
# import k-means from clustering stage
from sklearn.cluster import KMeans

# Part 1 of assignment #

##### Dowbload the wikipedia webpage and get the html source of the webpage #####

In [8]:
wikiData = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
html = etree.HTML(wikiData.text)

##### Extract the table data from the webpage #####

In [58]:
postalCodeTable = html.xpath('//table[@class="wikitable sortable"]/tbody/tr')

tableHeaders = [col.text.strip() for col in postalCodeTable[0].xpath('th')]
labels = ['Post Code', 'Borough', 'Neighborhood']
postalCodes = pd.DataFrame(columns=labels)

for row in postalCodeTable[1:]:
    rowElements = row.xpath("td")
    rowData = [column.text.strip() if column.text is not None else column.xpath('a')[0].text.strip() for column in rowElements]
    postalCodes = postalCodes.append(pd.DataFrame([rowData],columns=labels), ignore_index = True)

##### Drop the indexes where 'Borough' column is set to 'Not assigned' #####

In [59]:
indexesToDrop = postalCodes[postalCodes['Borough'] == 'Not assigned'].index
postalCodes =postalCodes.drop(indexesToDrop)
postalCodes =postalCodes.reset_index()

##### set the column  'Neighborhood' to the value of column 'Borough', if set to 'Not assigned'  and create only one row for each postal code #####

In [60]:
postalCodes.loc[postalCodes['Neighborhood'] == 'Not assigned', 'Neighborhood'] = postalCodes.loc[postalCodes['Neighborhood'] == 'Not assigned', 'Borough']

#Merge the rows which have same postal code
postalCodes = postalCodes.groupby(['Post Code','Borough'])['Neighborhood'].apply(', '.join).reset_index()
postalCodes.head(10)

Unnamed: 0,Post Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


##### Let's print the shape of dataframe #####

In [61]:
postalCodes.shape

(103, 3)

### -----------------------End of Part 1 of assignment ----------------------- ###

# Part 2 of assignment #

##### Downalod the csv file containing the geographial details #####

In [38]:
latLongDetails = pd.read_csv("http://cocl.us/Geospatial_data")

##### create the dataframe of Toronto neighborhood with ther geograhical details ######

In [62]:
#sort the latitude/longitude dataframe based on Postal Code, assuming they are not already sorted.
#This makes sure that this dataframe can diretly be mapped to postal code dataframe containing the Neighborhood infromation
latLongDetails.sort_values(['Postal Code'], ascending=True)
postalCodes['Latitude'] = latLongDetails['Latitude']
postalCodes['Longitude'] = latLongDetails['Longitude']
postalCodes.head(10)

Unnamed: 0,Post Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [40]:
postalCodes.shape

(103, 5)

### ------------- End of Part 2 of assignment --------------###

# Part 3 of assignment #

### Explore Neighborhoods in Toronto ###

In [63]:
#Modify the dataframe to have only postalcodes whose Boough field contains Toronto
postalCodes[postalCodes['Borough'].str.contains('Toronto')].reset_index(drop=True)
postalCodes.head()

Unnamed: 0,Post Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


##### Setup the parameters for invoking the Foursquare API #####

In [67]:
CLIENT_ID = 'UMYJMWHFKRU1VBBGWTSKNRM4VM2SGXCZQ4DRHNPUSJ0YFGKG'
CLIENT_SECRET = 'OZP1XC2FPDQULVUZBJRDHUK0YOQN1CCPP4SR5UQ0HSDOVQBF'
VERSION = '20190817'
LIMIT = 100

##### Let's create a function to to get the nearby venues for all the negighborhood in toronto ###### 

In [64]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


In [68]:
#Now write the code to run the above function on each neighborhood and create a new dataframe called toronto_venues.
toronto_venues = getNearbyVenues(names=postalCodes['Neighborhood'],
                                   latitudes=postalCodes['Latitude'],
                                   longitudes=postalCodes['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [69]:
#Let's check the size of the resulting dataframe
print(toronto_venues.shape)
print(toronto_venues.head())

(2234, 7)
                             Neighborhood  Neighborhood Latitude  \
0                          Rouge, Malvern              43.806686   
1  Highland Creek, Rouge Hill, Port Union              43.784535   
2  Highland Creek, Rouge Hill, Port Union              43.784535   
3       Guildwood, Morningside, West Hill              43.763573   
4       Guildwood, Morningside, West Hill              43.763573   

   Neighborhood Longitude                            Venue  Venue Latitude  \
0              -79.194353                          Wendy's       43.807448   
1              -79.160497            Royal Canadian Legion       43.782533   
2              -79.160497   Scarborough Historical Society       43.788755   
3              -79.188711  Swiss Chalet Rotisserie & Grill       43.767697   
4              -79.188711                G & G Electronics       43.765309   

   Venue Longitude        Venue Category  
0       -79.199056  Fast Food Restaurant  
1       -79.163085        

In [70]:
#Let's check how many venues were returned for each neighborhood
print(toronto_venues.groupby('Neighborhood').count())


                                                    Neighborhood Latitude  \
Neighborhood                                                                
Adelaide, King, Richmond                                              100   
Agincourt                                                               4   
Agincourt North, L'Amoreaux East, Milliken, Ste...                      2   
Albion Gardens, Beaumond Heights, Humbergate, J...                     11   
Alderwood, Long Branch                                                  9   
Bathurst Manor, Downsview North, Wilson Heights                        18   
Bayview Village                                                         4   
Bedford Park, Lawrence Manor East                                      22   
Berczy Park                                                            56   
Birch Cliff, Cliffside West                                             4   
Bloordale Gardens, Eringate, Markland Wood, Old...                      8   

In [71]:
#Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))


There are 272 uniques categories.


### Analyze Each Neighborhood ###

In [74]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']])

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
print(toronto_onehot.head())


                             Neighborhood  Venue Category_Accessories Store  \
0                          Rouge, Malvern                                 0   
1  Highland Creek, Rouge Hill, Port Union                                 0   
2  Highland Creek, Rouge Hill, Port Union                                 0   
3       Guildwood, Morningside, West Hill                                 0   
4       Guildwood, Morningside, West Hill                                 0   

   Venue Category_Afghan Restaurant  Venue Category_Airport  \
0                                 0                       0   
1                                 0                       0   
2                                 0                       0   
3                                 0                       0   
4                                 0                       0   

   Venue Category_Airport Food Court  Venue Category_Airport Lounge  \
0                                  0                              0   
1   

In [75]:
#And let's examine the new dataframe size.
print(toronto_onehot.shape)


(2234, 273)


In [76]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
print(toronto_grouped)


                                         Neighborhood  \
0                            Adelaide, King, Richmond   
1                                           Agincourt   
2   Agincourt North, L'Amoreaux East, Milliken, St...   
3   Albion Gardens, Beaumond Heights, Humbergate, ...   
4                              Alderwood, Long Branch   
5     Bathurst Manor, Downsview North, Wilson Heights   
6                                     Bayview Village   
7                   Bedford Park, Lawrence Manor East   
8                                         Berczy Park   
9                         Birch Cliff, Cliffside West   
10  Bloordale Gardens, Eringate, Markland Wood, Ol...   
11       Brockton, Exhibition Place, Parkdale Village   
12  Business Reply Mail Processing Centre 969 Eastern   
13                        CFB Toronto, Downsview East   
14  CN Tower, Bathurst Quay, Island airport, Harbo...   
15                        Cabbagetown, St. James Town   
16                             

In [77]:
#Let's confirm the new size
print(toronto_grouped.shape)



(100, 273)


In [78]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                            venue  freq
0      Venue Category_Coffee Shop  0.08
1             Venue Category_Café  0.05
2  Venue Category_Thai Restaurant  0.04
3       Venue Category_Steakhouse  0.04
4              Venue Category_Bar  0.04


----Agincourt----
                                      venue  freq
0             Venue Category_Breakfast Spot  0.25
1                     Venue Category_Lounge  0.25
2             Venue Category_Clothing Store  0.25
3               Venue Category_Skating Rink  0.25
4  Venue Category_Middle Eastern Restaurant  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                                            venue  freq
0                       Venue Category_Playground   0.5
1                             Venue Category_Park   0.5
2                    Venue Category_Metro Station   0.0
3              Venue Category_Monument / Landmark   0.0
4  Venue Category_Molecular Gastronomy Restaurant   0.0


--

                              venue  freq
0     Venue Category_Breakfast Spot  0.14
1                Venue Category_Gym  0.14
2     Venue Category_Clothing Store  0.14
3              Venue Category_Hotel  0.14
4  Venue Category_Food & Drink Shop  0.14


----Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West----
                               venue  freq
0  Venue Category_Light Rail Station  0.12
1         Venue Category_Coffee Shop  0.12
2                 Venue Category_Pub  0.12
3          Venue Category_Sports Bar  0.06
4    Venue Category_Sushi Restaurant  0.06


----Del Ray, Keelesdale, Mount Dennis, Silverthorn----
                                      venue  freq
0             Venue Category_Discount Store  0.33
1       Venue Category_Fast Food Restaurant  0.33
2             Venue Category_Sandwich Place  0.33
3  Venue Category_Middle Eastern Restaurant  0.00
4                      Venue Category_Motel  0.00


----Design Exchange, Toronto Dominion Centre----
      

                                      venue  freq
0             Venue Category_Baseball Field  0.33
1               Venue Category_Home Service  0.33
2                       Venue Category_Pool  0.33
3          Venue Category_Accessories Store  0.00
4  Venue Category_Middle Eastern Restaurant  0.00


----Humber Summit----
                                venue  freq
0          Venue Category_Pizza Place  0.33
1        Venue Category_Shopping Mall  0.33
2  Venue Category_Empanada Restaurant  0.33
3       Venue Category_Massage Studio  0.00
4       Venue Category_Medical Center  0.00


----Humewood-Cedarvale----
                              venue  freq
0              Venue Category_Trail  0.25
1              Venue Category_Field  0.25
2       Venue Category_Hockey Arena  0.25
3               Venue Category_Park  0.25
4  Venue Category_Accessories Store  0.00


----Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips----
                                venue  freq
0    

                                venue  freq
0                 Venue Category_Café  0.10
1          Venue Category_Coffee Shop  0.08
2   Venue Category_Italian Restaurant  0.05
3  Venue Category_American Restaurant  0.05
4               Venue Category_Bakery  0.05


----The Annex, North Midtown, Yorkville----
                              venue  freq
0        Venue Category_Coffee Shop  0.12
1     Venue Category_Sandwich Place  0.12
2               Venue Category_Café  0.12
3        Venue Category_Pizza Place  0.08
4  Venue Category_Indian Restaurant  0.04


----The Beaches----
                              venue  freq
0  Venue Category_Health Food Store  0.25
1              Venue Category_Trail  0.25
2       Venue Category_Neighborhood  0.25
3                Venue Category_Pub  0.25
4     Venue Category_Massage Studio  0.00


----The Beaches West, India Bazaar----
                              venue  freq
0               Venue Category_Park  0.10
1     Venue Category_Sandwich Place  0.

##### Let's put that into a pandas dataframe #####

In [79]:
#First, let's write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [80]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


In [81]:
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Venue Category_Coffee Shop,Venue Category_Café,Venue Category_Bar,Venue Category_Steakhouse,Venue Category_Thai Restaurant,Venue Category_Hotel,Venue Category_American Restaurant,Venue Category_Restaurant,Venue Category_Gym,Venue Category_Asian Restaurant
1,Agincourt,Venue Category_Lounge,Venue Category_Clothing Store,Venue Category_Breakfast Spot,Venue Category_Skating Rink,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore,Venue Category_Dumpling Restaurant,Venue Category_Eastern European Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Venue Category_Playground,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Dumpling Restaurant,Venue Category_Discount Store,Venue Category_Dive Bar,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Venue Category_Grocery Store,Venue Category_Coffee Shop,Venue Category_Discount Store,Venue Category_Japanese Restaurant,Venue Category_Sandwich Place,Venue Category_Beer Store,Venue Category_Fried Chicken Joint,Venue Category_Pizza Place,Venue Category_Pharmacy,Venue Category_Fast Food Restaurant
4,"Alderwood, Long Branch",Venue Category_Pizza Place,Venue Category_Gym,Venue Category_Skating Rink,Venue Category_Pharmacy,Venue Category_Sandwich Place,Venue Category_Coffee Shop,Venue Category_Pub,Venue Category_Pool,Venue Category_Dog Run,Venue Category_Diner


### Cluster Neighborhoods ###

In [82]:
#Run k-means to cluster the neighborhood into 5 clusters.

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

#Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = postalCodes

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!


Unnamed: 0,Post Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,3.0,Venue Category_Fast Food Restaurant,Venue Category_Yoga Studio,Venue Category_Eastern European Restaurant,Venue Category_Dive Bar,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore,Venue Category_Dumpling Restaurant,Venue Category_Electronics Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1.0,Venue Category_History Museum,Venue Category_Bar,Venue Category_Yoga Studio,Venue Category_Eastern European Restaurant,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore,Venue Category_Dumpling Restaurant,Venue Category_Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Venue Category_Breakfast Spot,Venue Category_Intersection,Venue Category_Pizza Place,Venue Category_Rental Car Location,Venue Category_Electronics Store,Venue Category_Medical Center,Venue Category_Mexican Restaurant,Venue Category_Donut Shop,Venue Category_Dog Run,Venue Category_Doner Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Venue Category_Coffee Shop,Venue Category_Indian Restaurant,Venue Category_Korean Restaurant,Venue Category_Yoga Studio,Venue Category_Eastern European Restaurant,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Drugstore,Venue Category_Dumpling Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Venue Category_Hakka Restaurant,Venue Category_Thai Restaurant,Venue Category_Fried Chicken Joint,Venue Category_Bank,Venue Category_Bakery,Venue Category_Athletics & Sports,Venue Category_Caribbean Restaurant,Venue Category_Cuban Restaurant,Venue Category_Cupcake Shop,Venue Category_Farmers Market


##### Finally, let's visualize the resulting clusters #####

In [93]:
address = 'City of Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
toronto_merged.dropna(subset = ['Cluster Labels'], how='all', inplace = True)
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The geograpical coordinate of Toronto City are 43.7170226, -79.4197830350134.
