# Segmenting and Clustering Neighbourhoods in Toronto

### Scraping Web to Obtain Toronto Data

In [110]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import urllib3

In [111]:
# Wikipedia link with Toronto postal codes
wikiLink = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Key for the dataframe we will use
dfColumns = ['PostalCode', 'Borough', 'Neighborhood']

# radius to get venues 
radius = 1100

#number of venues per neighbourhood
limit = 100


In [112]:
http = urllib3.PoolManager()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

#scrape raw data from wikipedia
response = http.request('GET', wikiLink)
soup = BeautifulSoup(response.data, "lxml")

In [113]:
df_neighbourhoods = pd.DataFrame(columns=dfColumns)
fullList = []

# obtain neighbourhoods under <tr>
neighbourhoodsList = soup.body.table.find_all('tr')

# Loop through each neighbourhood under <td> 
for neighbourhood in neighbourhoodsList:
    neighbourElts = neighbourhood.find_all('td')
    
    # Loop through each attritube of the current neighbourhood : name, title, and wikipedia url
    tmpList = []
    for elt in neighbourElts:
        # Remove the tags & newlines
        tmpList.append(str(elt.get_text().strip()))
        
    fullList.append(tmpList)

# Add the scraped nneighbourhoods into the dataframe
df_neighbourhoods=pd.DataFrame(fullList,columns=dfColumns)
df_neighbourhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [114]:
# remove neighbourhoods that have no boroughs assigned

df_neighbourhoods = df_neighbourhoods[(df_neighbourhoods.Borough.notnull())]
df_neighbourhoods = df_neighbourhoods[(df_neighbourhoods.Borough != "Not assigned")]

# group by PostalCode and Borough
df_neighbourhoods = pd.DataFrame(df_neighbourhoods.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(list)).reset_index()
df_neighbourhoods['Neighborhood'] = df_neighbourhoods['Neighborhood'].apply(lambda x: ', '.join(x))

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
df_neighbourhoods.loc[ (df_neighbourhoods.Neighborhood.isnull() == True) |
               (df_neighbourhoods.Neighborhood == "Not assigned")
               , 'Neighborhood'] = df_neighbourhoods.Borough

df_neighbourhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [115]:
df_neighbourhoods.shape


(103, 3)

### Adding Geographical coordinates to all neighbourhoods

In [116]:
!conda install -c conda-forge geocoder           
import geocoder # import geocoder

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [117]:
# Function to return latitude and longitude of postal codes
def PostalCodeCoord(postal_code):
    
    # set to None initially
    coord_lat_lng = None

    # using while loop to get coordinates
    while(coord_lat_lng is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        coord_lat_lng = g.latlng

    latitude = coord_lat_lng[0]
    longitude = coord_lat_lng[1]
    
    return latitude, longitude

In [118]:
postalCodeCoord = []

# Looping through each postal code in our dataframe
for postalCode in df_neighbourhoods['PostalCode']:
    # Fill the temporary list with the coordinates from geocoder
    latitude, longitude = PostalCodeCoord(postalCode)
    postalCodeCoord.append([postalCode, latitude, longitude])
    
# Transform the temp list into a dataframe
df_coords = pd.DataFrame(postalCodeCoord)
df_coords.columns = ['PostalCode', 'Latitude', 'Longitude']

# Merge the coordinates dataframe with the original neighbourhoods dataframe (key : postal code)
df_neighbourhoods = pd.merge(df_neighbourhoods, df_coords, on='PostalCode')

df_neighbourhoods_toronto = df_neighbourhoods

df_neighbourhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


### Clustering Neighbourhoods using Foursquare API

In [119]:
# install folium
!conda install -c conda-forge folium --yes               
import folium
import requests

# import k-means from sklearn
from sklearn.cluster import KMeans

# plotting library
import matplotlib.cm as cm
import matplotlib.colors as colors

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [120]:
address = 'Toronto, ON'

# set to None initially
coord_lat_lng = None

# use while loop to get the coordinates
while(coord_lat_lng is None):
    g = geocoder.arcgis('Toronto, ON')
    coord_lat_lng = g.latlng

latitude_toronto = coord_lat_lng[0]
longitude_toronto = coord_lat_lng[1]

print('Coordinates of Toronto are {}, {}.'.format(latitude_toronto, longitude_toronto))

Coordinates of Toronto are 43.648690000000045, -79.38543999999996.


In [121]:
# Map of Toronto city
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# Adding markers to the above map
for lat, lng, borough, neighborhood, postalCode in zip(df_neighbourhoods_toronto['Latitude'], df_neighbourhoods_toronto['Longitude'], df_neighbourhoods_toronto['Borough'], df_neighbourhoods_toronto['Neighborhood'], df_neighbourhoods_toronto['PostalCode']):
    label = '{}'.format(postalCode)
    label = folium.Popup(label, parse_html=True)

    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto) 
    
map_toronto

In [122]:
# Map of Toronto again
map_toronto_radius = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=11)

# Adding the markers to map
for lat, lng, borough, neighborhood, postalCode in zip(df_neighbourhoods_toronto['Latitude'], df_neighbourhoods_toronto['Longitude'], df_neighbourhoods_toronto['Borough'], df_neighbourhoods_toronto['Neighborhood'], df_neighbourhoods_toronto['PostalCode']):
    label = '{}'.format(postalCode)
    label = folium.Popup(label, parse_html=True)

    folium.Circle(
        [lat, lng],
        radius=radius,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.3).add_to(map_toronto_radius) 
    
map_toronto_radius

In [123]:
CLIENT_ID = 'ULY1JHPY2V3KKER1IMANQMJ3WTLSBK0Z3DY0OR0UNQCKOOF4' # Foursqaure ID
CLIENT_SECRET = 'BI5E2JPNLMYWVGH0OWAGXFIT253LFJDFRSWN42GGCMIT4XLD' # Foursquare Secret
VERSION = '20180605' # The Foursquare API version

In [124]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(df_neighbourhoods_toronto['Latitude'], df_neighbourhoods_toronto['Longitude'], df_neighbourhoods_toronto['PostalCode'], df_neighbourhoods_toronto['Borough'],df_neighbourhoods_toronto['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [125]:
# convert the venues list into a new DataFrame
toronto_venues = pd.DataFrame(venues)

# define the column names
toronto_venues.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(toronto_venues.shape)
toronto_venues.head()

(2430, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,Canadian Appliance Source Whitby,43.808353,-79.191331,Home Service
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
4,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,Heron Park,43.769327,-79.177201,Park


### Using one hot encoding we can cluster by kmeans

In [126]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['VenueCategory']], prefix="", prefix_sep="")

# add postalCode, borough, and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 
toronto_onehot['Borough'] = toronto_venues['Borough'] 

toronto_onehot.pop('Neighborhood')
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move postalCode, borough, neighborhood column to the first column
for i in range(0, 3):
    toronto_onehot = toronto_onehot[[toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])]

toronto_onehot.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,...,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Rouge, Malvern",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1E,Scarborough,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [127]:

#grouping rows by Postal Code, Borough and Neighbourhood and using means
toronto_grouped = toronto_onehot.groupby(['PostalCode','Borough', 'Neighborhood']).mean().reset_index()
toronto_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,...,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Rouge, Malvern",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,Scarborough,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,Scarborough,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Lets pring each neighborhood with top 5 most common venues

In [128]:
num_top_venues = 5

# Iterate through all the grouped dataframe
for index, row in toronto_grouped.iterrows():
    tempPostalCode = row['PostalCode']
    tempBorough = row['Borough']
    tempNeighborhood = row['Neighborhood']
    
    print("----"+tempPostalCode + " / " + tempBorough + " / " + tempNeighborhood +"----")
    
    # Create a temporary df filtered on the current neighbourhood (key: postal code x borough x neighbourhood)
    temp = toronto_grouped[
        (toronto_grouped.PostalCode == tempPostalCode) &
        (toronto_grouped.Borough == tempBorough) &
        (toronto_grouped.Neighborhood == tempNeighborhood)
    ].T.reset_index()
    
    temp.columns = ['venue','freq']

    # We skip the key PostalCode x Borough x Neighbourhood : length = 3, iloc[3:]
    temp = temp.iloc[len(dfColumns):]
    temp['freq'] = temp['freq'].astype(float)
    
    # Round the frequency with two digits
    temp = temp.round({'freq': 2})
    
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B / Scarborough / Rouge, Malvern----
                       venue  freq
0               Home Service   1.0
1                Men's Store   0.0
2         Mexican Restaurant   0.0
3  Middle Eastern Restaurant   0.0
4         Miscellaneous Shop   0.0


----M1C / Scarborough / Highland Creek, Rouge Hill, Port Union----
                       venue  freq
0                        Bar   1.0
1                Opera House   0.0
2         Mexican Restaurant   0.0
3  Middle Eastern Restaurant   0.0
4         Miscellaneous Shop   0.0


----M1E / Scarborough / Guildwood, Morningside, West Hill----
                        venue  freq
0  Construction & Landscaping  0.33
1                        Park  0.33
2        Gym / Fitness Center  0.33
3                      Museum  0.00
4                   Nightclub  0.00


----M1G / Scarborough / Woburn----
               venue  freq
0  Korean Restaurant   0.2
1               Park   0.2
2  Indian Restaurant   0.2
3        Coffee Shop   0.2
4   Business Ser

4  Sandwich Place  0.06


----M4M / East Toronto / Studio District----
                venue  freq
0               Diner  0.08
1  Italian Restaurant  0.06
2                Café  0.06
3             Brewery  0.06
4         Pizza Place  0.06


----M4N / Central Toronto / Lawrence Park----
                       venue  freq
0                   Bus Line   0.5
1                Swim School   0.5
2          Accessories Store   0.0
3                     Office   0.0
4  Middle Eastern Restaurant   0.0


----M4P / Central Toronto / Davisville North----
               venue  freq
0  Convenience Store  0.14
1     Breakfast Spot  0.14
2                Gym  0.14
3  Food & Drink Shop  0.14
4               Park  0.14


----M4R / Central Toronto / North Toronto West----
           venue  freq
0         Garden  0.25
1     Playground  0.25
2           Park  0.25
3       Gym Pool  0.25
4  Movie Theater  0.00


----M4S / Central Toronto / Davisville----
                venue  freq
0        Dessert Shop  0.1

                             venue  freq
0             Fast Food Restaurant  0.50
1                      Coffee Shop  0.25
2       Construction & Landscaping  0.25
3       Modern European Restaurant  0.00
4  Molecular Gastronomy Restaurant  0.00


----M6N / York / The Junction North, Runnymede----
                    venue  freq
0                 Brewery   0.6
1      Athletics & Sports   0.2
2  Furniture / Home Store   0.2
3           Movie Theater   0.0
4               Nightclub   0.0


----M6P / West Toronto / High Park, The Junction South----
                                      venue  freq
0                            Sandwich Place  0.33
1  Residential Building (Apartment / Condo)  0.33
2                                      Park  0.33
3                                 Nightclub  0.00
4                   New American Restaurant  0.00


----M6R / West Toronto / Parkdale, Roncesvalles----
                         venue  freq
0                  Coffee Shop  0.07
1                   

### Lets put the result in a dataframe

In [129]:
def return_most_common_venues(row, num_top_venues):
    # Remove the key PostalCode x Borough x Neighbourhood from the row
    row_categories = row.iloc[len(dfColumns):]
    
    # Sort ascending
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    # Return the top num_top_venues
    return row_categories_sorted.index.values[0:num_top_venues]

### Lets create a new dataframe and display top 10 venues in each neighborhood

In [130]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode', 'Borough', 'Neighborhood']

for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe, and set it with the columns names
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)

# add the keys from the grouped dataframe (Postal code x Borough x Neighborhood)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

# loop through each rows
for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, len(dfColumns):] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",Home Service,Food,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",Bar,Yoga Studio,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",Construction & Landscaping,Gym / Fitness Center,Park,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Dumpling Restaurant
3,M1G,Scarborough,Woburn,Indian Restaurant,Business Service,Coffee Shop,Park,Korean Restaurant,Fast Food Restaurant,Farmers Market,Farm,Field,Electronics Store
4,M1H,Scarborough,Cedarbrae,Playground,Yoga Studio,Dumpling Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


### Clustering Neighbourhoods

In [131]:
#Set no of clusters

kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
toronto_grouped_clustering = toronto_grouped_clustering.drop('PostalCode', 1)
toronto_grouped_clustering = toronto_grouped_clustering.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 2, 3, 2, 4, 2, 2, 2, 2, 3])

### Lets now create a dataframe that includes the clusters for each venue

In [132]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [133]:

toronto_merged = df_neighbourhoods_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index(['PostalCode','Borough', 'Neighborhood']), on=['PostalCode','Borough', 'Neighborhood'])

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,1.0,Home Service,Food,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725,2.0,Bar,Yoga Studio,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,3.0,Construction & Landscaping,Gym / Fitness Center,Park,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Dumpling Restaurant
3,M1G,Scarborough,Woburn,43.768369,-79.21759,2.0,Indian Restaurant,Business Service,Coffee Shop,Park,Korean Restaurant,Fast Food Restaurant,Farmers Market,Farm,Field,Electronics Store
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,4.0,Playground,Yoga Studio,Dumpling Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


### Lets visualize the clusters

In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, postalCode, borough, neighborhood, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(postalCode) + ' - Cluster ' + str(cluster), parse_html=True)
    cluster = int(cluster)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Lets analyze all the clusters

#### Cluster 0
This cluster has  mostly fast food restaurants and construction & landscaping as the primary venues

In [137]:
toronto_merged[toronto_merged['Cluster Labels'] == 0].head(10)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
80,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.69453,-79.484489,0.0,Fast Food Restaurant,Construction & Landscaping,Coffee Shop,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
96,M9L,North York,Humber Summit,43.7595,-79.557028,0.0,Construction & Landscaping,Food,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


#### Cluster 1
This cluster has home services category as the most common venue followed by food and flea markets

In [140]:
toronto_merged[toronto_merged['Cluster Labels'] == 1].head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.811525,-79.195517,1.0,Home Service,Food,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
64,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.694785,-79.414405,1.0,Home Service,Accessories Store,Wings Joint,Concert Hall,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market


#### Cluster 2
This cluster has Indian restaurants as the most common venue followed by shopping malls and convenience stores

In [141]:
toronto_merged[toronto_merged['Cluster Labels'] == 2].head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.785665,-79.158725,2.0,Bar,Yoga Studio,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm
3,M1G,Scarborough,Woburn,43.768369,-79.21759,2.0,Indian Restaurant,Business Service,Coffee Shop,Park,Korean Restaurant,Fast Food Restaurant,Farmers Market,Farm,Field,Electronics Store
5,M1J,Scarborough,Scarborough Village,43.743125,-79.23175,2.0,Indian Restaurant,Train Station,Restaurant,Grocery Store,Eastern European Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Yoga Studio
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.726276,-79.263625,2.0,Discount Store,Convenience Store,Coffee Shop,Department Store,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Electronics Store,Fish Market
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.713054,-79.285055,2.0,Bus Line,Bakery,Intersection,Coffee Shop,Metro Station,Bus Station,Soccer Field,Yoga Studio,Empanada Restaurant,Ethiopian Restaurant
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.724235,-79.227925,2.0,Bistro,Bank,Pharmacy,Liquor Store,Sandwich Place,Coffee Shop,Yoga Studio,Elementary School,Empanada Restaurant,Ethiopian Restaurant
10,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.759975,-79.268974,2.0,Gift Shop,Bakery,Light Rail Station,Falafel Restaurant,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Farm,Eastern European Restaurant
11,M1R,Scarborough,"Maryvale, Wexford",43.75071,-79.30056,2.0,Convenience Store,Auto Garage,Farm,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Eastern European Restaurant
12,M1S,Scarborough,Agincourt,43.79394,-79.267976,2.0,Shopping Mall,Chinese Restaurant,Skating Rink,Bakery,Sushi Restaurant,Supermarket,Department Store,Grocery Store,Pool,Vietnamese Restaurant
13,M1T,Scarborough,"Clarks Corners, Sullivan, Tam O'Shanter",43.784725,-79.299066,2.0,Pizza Place,Convenience Store,Fried Chicken Joint,Hobby Shop,Pharmacy,Chinese Restaurant,Bus Stop,Thai Restaurant,Coffee Shop,Shopping Mall


#### Cluster 3
This cluster has parks, playgrounds and construction & landscaping as the main venues

In [143]:
toronto_merged[toronto_merged['Cluster Labels'] == 3].head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765815,-79.175193,3.0,Construction & Landscaping,Gym / Fitness Center,Park,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Dumpling Restaurant
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.69677,-79.259967,3.0,General Entertainment,College Stadium,Skating Rink,Gym Pool,Park,Gym,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
19,M2K,North York,Bayview Village,43.781015,-79.380529,3.0,Construction & Landscaping,Trail,Park,Event Space,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Falafel Restaurant,Dumpling Restaurant
23,M2P,North York,York Mills West,43.747895,-79.399919,3.0,Convenience Store,Park,Speakeasy,Bank,Cuban Restaurant,Electronics Store,Cosmetics Shop,Fish Market,Fish & Chips Shop,Field
25,M3A,North York,Parkwoods,43.75242,-79.329242,3.0,Food & Drink Shop,Park,Yoga Studio,Falafel Restaurant,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Farm
34,M4A,North York,Victoria Village,43.7306,-79.313265,3.0,Park,Pharmacy,Grocery Store,Yoga Studio,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant
40,M4J,East York,East Toronto,43.688765,-79.334175,3.0,Bar,Park,Italian Restaurant,Farmers Market,Electronics Store,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant
41,M4K,East Toronto,"The Danforth West, Riverdale",43.683178,-79.355105,3.0,Park,Discount Store,Bus Line,Grocery Store,Yoga Studio,Elementary School,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant
46,M4R,Central Toronto,North Toronto West,43.714523,-79.40696,3.0,Playground,Park,Garden,Gym Pool,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Dumpling Restaurant,Event Space
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.690685,-79.382946,3.0,Playground,Gym,Park,Tennis Court,Cosmetics Shop,Creperie,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market


#### Cluster 4
This cluster is an outlier having playground as the most common venue followed by yoga studio and restaurants

In [146]:
toronto_merged[toronto_merged['Cluster Labels'] == 4].head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944,4.0,Playground,Yoga Studio,Dumpling Restaurant,Fish Market,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
