# Segmenting and Clustering Neighborhoods in Toronto
## Part1




Import used packages

In [268]:
import requests

import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

Use BeautifulSoup to scarp the needed neighbourhood data from the Wikipedia page
https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
and extract the table.

In [269]:
wikiSiteHTML = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
wikiSiteParsed = BeautifulSoup(wikiSiteHTML, 'lxml')
wikiTable = wikiSiteParsed.find('table', class_='wikitable sortable')

Process the extracted table to acquire the needed format of the table data.
- get only rows with a borough not "Not assigned
- set neighbourhood which are "Not assigned" to the value of borough

In [270]:
wikiTableRows = wikiTable.tbody.find_all('tr')

processedTableList = []
for tr in wikiTableRows:
    tds = tr.find_all('td')

    if tds == []:
        continue
    # Remove posible trailing \n with rstrip()
    wikiRow = [cell.text.rstrip() for cell in tds]

    if wikiRow[1] != 'Not assigned':
        if wikiRow[2] == 'Not assigned':
            wikiRow[2] = wikiRow[1]
        processedTableList.append(wikiRow)

Create a Pandas dataframe

In [271]:
# Create Pandas DF
wikiDF = pd.DataFrame(processedTableList, columns = ["PostalCode", "Borough", "Neighbourhood"])
wikiDF.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


Group the same neighbourhoods together under one zip postal code

In [272]:
# Group Neighbourhoods
wikiDF = wikiDF.groupby(["PostalCode", "Borough"])["Neighbourhood"].apply(", ".join)
wikiDF = wikiDF.reset_index()
wikiDF.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


Shape of the pandas dataframe wikiDF:

In [273]:
wikiDF.shape

(103, 3)

## Part 2: Get the latitude and the longitude coordinates

Load data of coordinates from csv-File into a dataframe since Geocoder works unreliable.<br>
This file is loaded from an IBM storabge.<br>
Hidden cells with sensitiv data not visible: body variable defined there.

In [274]:
# The code was removed by Watson Studio for sharing.

In [275]:
coordinatesDF = pd.read_csv(body)
coordinatesDF.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merge the two dataframes together.
Keys for the merge are the postal codes (they are stort in differently nemaed columns)

In [276]:
complDF = pd.merge(wikiDF, coordinatesDF, how='left', left_on = 'PostalCode', right_on = 'Postal Code')
# remove the "Postal Code" column
complDF.drop("Postal Code", axis=1, inplace=True)
complDF.head(10)



Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Part 3: Explore and cluster the neighborhoods in Toronto

Print a map of Toronto and mark the neighbourhoods <br>
<br>
Coordinates of Toronto:<br>
Latitude: 43.6529<br>
Longitude: -79.3849<br>
Found on: https://gps-coordinates.org/toronto-latitude.php


In [277]:
#Install folium
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab

Solving environment: done

# All requested packages already installed.



In [278]:
from pandas.io.json import json_normalize
import folium

torontoLon = -79.3849
torontoLat = 43.6529

In [279]:
# create map
map_toronto = folium.Map(location=[torontoLat, torontoLon], zoom_start=11)

# add markers to map
for lat, lng, borough, neighbourhood in zip(complDF['Latitude'], complDF['Longitude'], complDF['Borough'], complDF['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

### Now lets have a look a the Hill locations in Toronto
We filter for all boroughs with a nighbourhoot in the hills (with "Hill" in their name) and display them on a map

In [280]:
hillDF = complDF[complDF['Neighbourhood'].str.contains('Hill')]
hillDF.append(complDF[complDF['Neighbourhood'].str.contains('Hill')])
    
hillDF = hillDF.reset_index(drop=True)
hillDF.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
1,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
2,M2H,North York,Hillcrest Village,43.803762,-79.363452
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
4,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
5,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049
6,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307


In [281]:
map_hill = folium.Map(location=[hillDF['Latitude'][0], hillDF['Longitude'][0]], zoom_start=11)
for lat, lng, borough, neighbourhood in zip(hillDF['Latitude'], hillDF['Longitude'], hillDF['Borough'], hillDF['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_hill)  

map_hill

### Lets have a more detailed look at the first nighbourhood
Definition of Foursquare credentials and version

In [282]:
CLIENT_ID = 'HIDDEN'
CLIENT_SECRET = 'HIDDEN'
VERSION = '20180605' 

In [283]:
# The code was removed by Watson Studio for sharing.

Get the coordinates of the first neighbourhood

In [284]:
hillLat = hillDF['Latitude'][0]
hillLong= hillDF['Longitude'][0]
print('Coordinates of "{}" are {},{}'.format(hillDF['Neighbourhood'][0],hillLat, hillLong))

Coordinates of "Highland Creek, Rouge Hill, Port Union" are 43.7845351,-79.16049709999999


Define Foursqure-url and call it to gather the information

In [285]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 700 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    hillLat, 
    hillLong, 
    radius, 
    LIMIT)
results = requests.get(url).json()

Define a function to get the category of a venue

In [286]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Process the information from Forsquare

In [287]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print('{} venues were found by Foursquare in {}.'.format(nearby_venues.shape[0], hillDF['Neighbourhood'][0]))
nearby_venues.head()

4 venues were found by Foursquare in Highland Creek, Rouge Hill, Port Union.


Unnamed: 0,name,categories,lat,lng
0,Shamrock Burgers,Burger Joint,43.783823,-79.168406
1,Ted's Restaurant,Breakfast Spot,43.784468,-79.1692
2,Royal Canadian Legion,Bar,43.782533,-79.163085
3,Amigo's,Breakfast Spot,43.783749,-79.168691


Foursquare returns not much venues for this neighbourhood.

### Explore the whole hill neighbourhood

First we define a function to repeat the venue search for every neighbourhood

In [288]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Run the above defined function for every neighbourhood

In [289]:
toronto_hill_venues = getNearbyVenues(names=hillDF['Neighbourhood'], latitudes=hillDF['Latitude'], longitudes=hillDF['Longitude'])
print('{} venues were found by Foursquare in "{}" and "{}".'.format(toronto_hill_venues.shape[0], hillDF['Neighbourhood'][0], hillDF['Neighbourhood'][1]))
toronto_hill_venues.head()

47 venues were found by Foursquare in "Highland Creek, Rouge Hill, Port Union" and "Guildwood, Morningside, West Hill".


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa


How many results were returned for the single neighbourhoods

In [290]:
toronto_hill_venues.groupby('Neighborhood')['Venue'].count().reset_index()

Unnamed: 0,Neighborhood,Venue
0,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",14
1,"Forest Hill North, Forest Hill West",4
2,"Guildwood, Morningside, West Hill",9
3,"Highland Creek, Rouge Hill, Port Union",2
4,Hillcrest Village,4
5,"Silver Hills, York Mills",1
6,"Woodbine Gardens, Parkview Hill",13


Let's analyse how many different cathegories there are.

In [291]:
print('There are {} uniques categories.'.format(len(toronto_hill_venues['Venue Category'].unique())))

There are 38 uniques categories.


### Let's look at a bigger radius

In [292]:
toronto_hill_venues2 = getNearbyVenues(names=hillDF['Neighbourhood'], latitudes=hillDF['Latitude'], longitudes=hillDF['Longitude'], radius=1000)
print('{} venues were found by Foursquare in "{}" and "{}" in a radius of 1000m.'.format(toronto_hill_venues2.shape[0], hillDF['Neighbourhood'][0], hillDF['Neighbourhood'][1]))
toronto_hill_venues2.head()

195 venues were found by Foursquare in "Highland Creek, Rouge Hill, Port Union" and "Guildwood, Morningside, West Hill" in a radius of 1000m.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Shamrock Burgers,43.783823,-79.168406,Burger Joint
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Fratelli Village Pizzeria,43.784008,-79.169787,Italian Restaurant
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Ted's Restaurant,43.784468,-79.1692,Breakfast Spot
3,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Colonel Danforth Park,43.777507,-79.164303,Playground
4,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Centennial Park,43.786257,-79.148776,Park


How many results were returned for the single neighbourhoods

In [293]:
toronto_hill_venues2.groupby('Neighborhood')['Venue'].count().reset_index()

Unnamed: 0,Neighborhood,Venue
0,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",80
1,"Forest Hill North, Forest Hill West",42
2,"Guildwood, Morningside, West Hill",23
3,"Highland Creek, Rouge Hill, Port Union",5
4,Hillcrest Village,21
5,"Silver Hills, York Mills",4
6,"Woodbine Gardens, Parkview Hill",20


The neighbourhoods seem to have different popularities.

Let's analyse how many different cathegories there are.

In [294]:
print('There are {} uniques categories.'.format(len(toronto_hill_venues2['Venue Category'].unique())))

There are 79 uniques categories.


<b>We go one with the smaler result set obtained with a raius of 500m

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [295]:
# one hot encoding
toronto_hill_onehot = pd.get_dummies(toronto_hill_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_hill_onehot['Neighborhood'] = toronto_hill_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_hill_onehot.columns[-1]] + list(toronto_hill_onehot.columns[:-1])
toronto_hill_onehot = toronto_hill_onehot[fixed_columns]

toronto_hill_grouped = toronto_hill_onehot.groupby('Neighborhood').mean().reset_index()
toronto_hill_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bagel Shop,Bank,Bar,Breakfast Spot,Cafeteria,Café,Coffee Shop,...,Pool,Pub,Rental Car Location,Spa,Sports Bar,Supermarket,Sushi Restaurant,Tech Startup,Trail,Vietnamese Restaurant
0,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.142857,...,0.0,0.142857,0.0,0.0,0.071429,0.071429,0.071429,0.0,0.0,0.071429
1,"Forest Hill North, Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0
2,"Guildwood, Morningside, West Hill",0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0
3,"Highland Creek, Rouge Hill, Port Union",0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Show the 5 most common venues in each neighbourhood

In [296]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_hill_grouped['Neighborhood']

for ind in np.arange(toronto_hill_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_hill_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Pub,Coffee Shop,Vietnamese Restaurant,Bagel Shop,Fried Chicken Joint
1,"Forest Hill North, Forest Hill West",Jewelry Store,Sushi Restaurant,Park,Trail,Bagel Shop
2,"Guildwood, Morningside, West Hill",Mexican Restaurant,Tech Startup,Intersection,Electronics Store,Spa
3,"Highland Creek, Rouge Hill, Port Union",Construction & Landscaping,Bar,Vietnamese Restaurant,Gym / Fitness Center,Golf Course
4,Hillcrest Village,Golf Course,Mediterranean Restaurant,Dog Run,Pool,Vietnamese Restaurant


### Now lets cluster the neighbourhoods

Import library and initialize model

In [297]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

toronto_hill_grouped_clustering = toronto_hill_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_hill_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 3, 2, 4, 0, 1, 2], dtype=int32)

In [301]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_hill_merged = hillDF

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_hill_merged = toronto_hill_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_hill_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,4,Construction & Landscaping,Bar,Vietnamese Restaurant,Gym / Fitness Center,Golf Course
1,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2,Mexican Restaurant,Tech Startup,Intersection,Electronics Store,Spa
2,M2H,North York,Hillcrest Village,43.803762,-79.363452,0,Golf Course,Mediterranean Restaurant,Dog Run,Pool,Vietnamese Restaurant
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714,1,Cafeteria,Vietnamese Restaurant,Construction & Landscaping,Gym / Fitness Center,Golf Course
4,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,2,Pizza Place,Fast Food Restaurant,Café,Gastropub,Pet Store


Now lets visualize the findings

In [302]:
# import libraries for coloizing
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[torontoLat, torontoLon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(
        toronto_hill_merged['Latitude'], 
        toronto_hill_merged['Longitude'], 
        toronto_hill_merged['Neighbourhood'], 
        toronto_hill_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<b>Cluster 1

In [303]:
toronto_hill_merged.loc[toronto_hill_merged['Cluster Labels'] == 0, toronto_hill_merged.columns[[1] + list(range(5, toronto_hill_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,North York,0,Golf Course,Mediterranean Restaurant,Dog Run,Pool,Vietnamese Restaurant


<b> Cluster 2

In [304]:
toronto_hill_merged.loc[toronto_hill_merged['Cluster Labels'] == 1, toronto_hill_merged.columns[[1] + list(range(5, toronto_hill_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
3,North York,1,Cafeteria,Vietnamese Restaurant,Construction & Landscaping,Gym / Fitness Center,Golf Course


<b> Cluster 3

In [305]:
toronto_hill_merged.loc[toronto_hill_merged['Cluster Labels'] == 2, toronto_hill_merged.columns[[1] + list(range(5, toronto_hill_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Scarborough,2,Mexican Restaurant,Tech Startup,Intersection,Electronics Store,Spa
4,East York,2,Pizza Place,Fast Food Restaurant,Café,Gastropub,Pet Store
5,Central Toronto,2,Pub,Coffee Shop,Vietnamese Restaurant,Bagel Shop,Fried Chicken Joint


<b> Cluster 4

In [306]:
toronto_hill_merged.loc[toronto_hill_merged['Cluster Labels'] == 3, toronto_hill_merged.columns[[1] + list(range(5, toronto_hill_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
6,Central Toronto,3,Jewelry Store,Sushi Restaurant,Park,Trail,Bagel Shop


<b> Cluster 5

In [307]:
toronto_hill_merged.loc[toronto_hill_merged['Cluster Labels'] == 4, toronto_hill_merged.columns[[1] + list(range(5, toronto_hill_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Scarborough,4,Construction & Landscaping,Bar,Vietnamese Restaurant,Gym / Fitness Center,Golf Course


# End of my Jupyter Notebook