# Segmenting and Clustering Neighborhoods in Toronto

## Part 1

Import relevant libraries:

In [148]:
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 
import requests
from pandas.io.json import json_normalize

Use urllib and BeautifulSoup to get the wikitable data as html then initialise empty lists for each column and loop through the items in the wikitable and append to lists:

In [149]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page,'html.parser')

postal_code = []
borough = []
neighbourhood = []

for items in soup.find('table', class_='wikitable').find_all('tr')[1::1]:
    data = items.find_all(['th','td'])
    borough.append(data[1].find(text=True))
    neighbourhood.append(data[2].find(text=True))
    postal_code.append(data[0].find(text=True)) 

Combine the lists into one dataframe:

In [150]:
df = pd.DataFrame(postal_code,columns=['postal_code'])
df['borough'] = borough
df['neighbourhood'] = neighbourhood
df

Unnamed: 0,postal_code,borough,neighbourhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
...,...,...,...
175,M5Z\n,Not assigned\n,Not assigned\n
176,M6Z\n,Not assigned\n,Not assigned\n
177,M7Z\n,Not assigned\n,Not assigned\n
178,M8Z\n,Etobicoke\n,"Mimico NW, The Queensway West, South of Bloor,..."


Remove the unwanted new line '\n' at the end of each string:

In [151]:
df.replace('\n','', regex=True, inplace=True)

Remove rows where Borough = 'Not assigned'. Note: As there are no neighbourhoods with the value 'Not assigned' there is no handler needed for this at this stage.

In [152]:
df = df[df['borough'] != 'Not assigned']

In [153]:
df.shape

(103, 3)

## Part 2

Read in coordinates from CSV file as api not returning expected results:

In [154]:
coords = pd.read_csv(r'C:\Users\Eilidh.Mayne\Documents\Coursera Repo\Coursera_Capstone\Geospatial_Coordinates.csv')

Merge the two dataframes on postal code and delete the duplicated postal code column:

In [155]:
newdf = pd.merge(df, coords, how='left', left_on = 'postal_code', right_on = 'Postal Code')
newdf.drop(['Postal Code'], axis=1, inplace=True)

In [156]:
newdf

Unnamed: 0,postal_code,borough,neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## Part 3

In [157]:
neighbourhoods = newdf

Set up Folium map with Toronto coordinates and plot all neighbourhoods, labelled in their boroughs.

In [158]:
address = 'Toronto, TOR'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.7370584, -79.2442535.


In [159]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(neighborhoods['Latitude'], neighbourhoods['Longitude'], neighbourhoods['borough'], neighbourhoods['neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Plot map focussing on neighbourhoods in the borough of North York:

In [160]:
north_york_data = neighbourhoods[neighbourhoods['borough'] == 'North York'].reset_index(drop=True)
north_york_data.head()

Unnamed: 0,postal_code,borough,neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [161]:
address = 'North York, Toronto'

geolocator = Nominatim(user_agent="north_york_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of North York are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of North York are 43.7543263, -79.44911696639593.


In [162]:
# create map of Manhattan using latitude and longitude values
map_north_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(north_york_data['Latitude'], north_york_data['Longitude'], north_york_data['neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='orange',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_north_york)  
    
map_north_york

Initialise Foursquare API credentials. Note: these have been removed for privacy.

In [163]:
CLIENT_ID = 'A4ZHKOHUHIVQ4D0AHZFVNLAQHBWU43JQVBRCUXN5KYXKKOPD' 
CLIENT_SECRET = '2IOQ1YMRGCAIZ5NNL13MAI3PKKBLBQWBLWLKOAZGVIZZONBM' 
AccessCode = 'MD0Q44JWJGS0CNQNBW1M05U4Z0ACCOJT0UN12NII2JTGOWOF'
VERSION = '20180604'
LIMIT = 30

Get the coordinates for each neighbourhood in North York and pass these to the API to return further data

In [165]:
neighborhood_latitude = north_york_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = north_york_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = north_york_data.loc[0, 'neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [166]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fb189ad3023093e62593e3b'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'suggestedRadius': 2964,
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 182,
  'suggestedBounds': {'ne': {'lat': 43.7654047500393,
    'lng': -79.29882008379498},
   'sw': {'lat': 43.73279658142393, 'lng': -79.36076675520702}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b8991cbf964a520814232e3',
       'name': "Allwyn's Bakery",
       'location': {'address': '81 Underhill drive',
        'lat': 43.75984035203157,
        'lng': -79.32471879917513,
        'labeledLatLngs': [{'label': 'display',
          'la

Function to get details for venues within a 500 metre radius of each neighbourhood in North York:

In [167]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [168]:
north_york_venues = getNearbyVenues(names=north_york_data['neighbourhood'],
                                   latitudes=north_york_data['Latitude'],
                                   longitudes=north_york_data['Longitude']
                                  )


Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


Number of venues per neighbourhood:

In [88]:
north_york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Don Mills,23,23,23,23,23,23
Downsview,16,16,16,16,16,16
"Fairview, Henry Farm, Oriole",30,30,30,30,30,30
Glencairn,5,5,5,5,5,5
Hillcrest Village,5,5,5,5,5,5
Humber Summit,2,2,2,2,2,2
"Humberlea, Emery",1,1,1,1,1,1


In [89]:
print('There are {} uniques categories.'.format(len(north_york_venues['Venue Category'].unique())))

There are 94 uniques categories.


One hot encoding:

In [90]:
# one hot encoding
north_york_onehot = pd.get_dummies(north_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
north_york_onehot['Neighborhood'] = north_york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [north_york_onehot.columns[-1]] + list(north_york_onehot.columns[:-1])
north_york_onehot = north_york_onehot[fixed_columns]

north_york_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [91]:
north_york_onehot.shape

(197, 95)

Group rows by neighbourhood and show the mean of the frequency of occurrence of each category:

In [92]:
north_york_grouped = north_york_onehot.groupby('Neighborhood').mean().reset_index()
north_york_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,...,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.045455,0.045455,0.0,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.043478,0.0,0.043478,0.0,0.0,0.0,...,0.043478,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Fairview, Henry Farm, Oriole",0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.033333,0.0,0.0
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [93]:
north_york_grouped.shape

(19, 95)

Show the 10 most common venue types for each neighbourhood:

In [94]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [95]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = north_york_grouped['Neighborhood']

for ind in np.arange(north_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(north_york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Middle Eastern Restaurant,Frozen Yogurt Shop,Gas Station,Diner,Deli / Bodega,Ice Cream Shop,Chinese Restaurant,Mobile Phone Shop
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Women's Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Café,Restaurant,Juice Bar,Butcher,Comfort Food Restaurant,Indian Restaurant,Pharmacy
3,Don Mills,Gym,Coffee Shop,Beer Store,Japanese Restaurant,Sandwich Place,Italian Restaurant,Discount Store,Bike Shop,Café,Restaurant
4,Downsview,Grocery Store,Park,Home Service,Business Service,Hotel,Baseball Field,Gym / Fitness Center,Shopping Mall,Bank,Liquor Store


Split the neighbourhoods into clusters based on the venue types:

In [96]:
# set number of clusters
kclusters = 5

north_york_grouped_clustering = north_york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(north_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 4, 4, 4, 4, 4, 4, 3, 2])

In [97]:
neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Middle Eastern Restaurant,Frozen Yogurt Shop,Gas Station,Diner,Deli / Bodega,Ice Cream Shop,Chinese Restaurant,Mobile Phone Shop
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Women's Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Café,Restaurant,Juice Bar,Butcher,Comfort Food Restaurant,Indian Restaurant,Pharmacy
3,Don Mills,Gym,Coffee Shop,Beer Store,Japanese Restaurant,Sandwich Place,Italian Restaurant,Discount Store,Bike Shop,Café,Restaurant
4,Downsview,Grocery Store,Park,Home Service,Business Service,Hotel,Baseball Field,Gym / Fitness Center,Shopping Mall,Bank,Liquor Store
5,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Bank,Juice Bar,Restaurant,Cosmetics Shop,Burger Joint,Department Store,Pharmacy,Liquor Store
6,Glencairn,Park,Pizza Place,Bakery,Japanese Restaurant,Pub,Women's Store,Dim Sum Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
7,Hillcrest Village,Golf Course,Athletics & Sports,Mediterranean Restaurant,Pool,Dog Run,Food Truck,Food Court,Comfort Food Restaurant,Furniture / Home Store,Construction & Landscaping
8,Humber Summit,Furniture / Home Store,Pizza Place,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
9,"Humberlea, Emery",Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant


In [98]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
north_york_merged = north_york_data
north_york_merged = north_york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='neighbourhood')

In [112]:
north_york_merged.dropna(inplace=True)

In [116]:
north_york_merged['Cluster Labels'] = north_york_merged['Cluster Labels'].astype(int)

In [117]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(north_york_merged['Latitude'], north_york_merged['Longitude'], north_york_merged['neighbourhood'], north_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 1

In [125]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 0, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,0,Park,Food & Drink Shop,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
14,"North Park, Maple Leaf Park, Upwood Park",0,Park,Construction & Landscaping,Bakery,Women's Store,Discount Store,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
22,York Mills West,0,Park,Convenience Store,Women's Store,Chocolate Shop,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,Department Store


### Cluster 2

In [126]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 1, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,"York Mills, Silver Hills",1,Martial Arts School,Women's Store,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant


### Cluster 3

In [127]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 2, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,"Humberlea, Emery",2,Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant


### Cluster 4

In [128]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 3, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Humber Summit,3,Furniture / Home Store,Pizza Place,Women's Store,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega


### Cluster 5

In [129]:
north_york_merged.loc[north_york_merged['Cluster Labels'] == 4, north_york_merged.columns[[2] + list(range(5, north_york_merged.shape[1]))]]

Unnamed: 0,neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,4,Intersection,Hockey Arena,Pizza Place,Portuguese Restaurant,Coffee Shop,Food Court,Food & Drink Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,"Lawrence Manor, Lawrence Heights",4,Clothing Store,Women's Store,Vietnamese Restaurant,Boutique,Coffee Shop,Event Space,Furniture / Home Store,Gift Shop,Accessories Store,Sporting Goods Shop
3,Don Mills,4,Gym,Coffee Shop,Beer Store,Japanese Restaurant,Sandwich Place,Italian Restaurant,Discount Store,Bike Shop,Café,Restaurant
4,Glencairn,4,Park,Pizza Place,Bakery,Japanese Restaurant,Pub,Women's Store,Dim Sum Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
5,Don Mills,4,Gym,Coffee Shop,Beer Store,Japanese Restaurant,Sandwich Place,Italian Restaurant,Discount Store,Bike Shop,Café,Restaurant
6,Hillcrest Village,4,Golf Course,Athletics & Sports,Mediterranean Restaurant,Pool,Dog Run,Food Truck,Food Court,Comfort Food Restaurant,Furniture / Home Store,Construction & Landscaping
7,"Bathurst Manor, Wilson Heights, Downsview North",4,Bank,Coffee Shop,Middle Eastern Restaurant,Frozen Yogurt Shop,Gas Station,Diner,Deli / Bodega,Ice Cream Shop,Chinese Restaurant,Mobile Phone Shop
8,"Fairview, Henry Farm, Oriole",4,Clothing Store,Coffee Shop,Bank,Juice Bar,Restaurant,Cosmetics Shop,Burger Joint,Department Store,Pharmacy,Liquor Store
9,"Northwood Park, York University",4,Furniture / Home Store,Bar,Metro Station,Caribbean Restaurant,Massage Studio,Miscellaneous Shop,Coffee Shop,Fast Food Restaurant,Event Space,Frozen Yogurt Shop
10,Bayview Village,4,Japanese Restaurant,Chinese Restaurant,Café,Bank,Women's Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
