# Segmenting and Clustering Neighborhoods in Toronto

### Task 1: Scraping the table

#### (a) Import packages

In [7]:
import numpy as np
import pandas as pd
import requests
import json
from bs4 import BeautifulSoup

#### (b) Read table into json and then pandas dataframe

In [8]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
postalcodes = pd.read_html(str(table))
postalcodes_json = postalcodes[0].to_json(orient='records')
postalcodes = pd.read_json(postalcodes_json)

#### (c) Remove missing boroughs, sort and reset index

In [9]:
postalcodes = postalcodes.query('Borough != "Not assigned"')
postalcodes.sort_values(by = "Postal Code", axis = 0, ascending = True, inplace = True)
postalcodes.reset_index(drop =True, inplace = True)
postalcodes = postalcodes.rename(columns = {'Postal Code':'PostalCode'})
postalcodes

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


#### (d) Print the shape of the dataframe

In [10]:
print(postalcodes.shape)

(103, 3)


### Task 2: Get location data

After being unable to get results using the Geocoder package, I'm using the CSV provided.

#### (a) Read CSV of postcode locations

In [11]:
# Read the CSV
coords = pd.read_csv('https://cocl.us/Geospatial_data')

# Rename Postal Code column so it matches df - this avoids using left_on and right_on in merge
coords.columns = ['PostalCode', 'Latitude', 'Longitude']
coords

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


#### (b) Merge the two dataframes

In [12]:
torontodf = pd.merge(postalcodes, coords, how='left', on='PostalCode')
torontodf.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Task 3: Explore and cluster the neighborhoods in Toronto

#### (a) Import packages

In [13]:
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import matplotlib.cm as cm
import matplotlib.colors as colors

#### (b) Get Toronto latitude and longitude

In [14]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
torontolocation = geolocator.geocode(address)
torontolatitude = torontolocation.latitude
torontolongitude = torontolocation.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(torontolatitude, torontolongitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


#### (c) Create map with postcodes marked

In [15]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[torontolatitude, torontolongitude], zoom_start=11)

# add markers to map
for lat, lng, postal_code, borough, neighbourhood in zip(torontodf['Latitude'], torontodf['Longitude'], torontodf['PostalCode'], torontodf['Borough'], torontodf['Neighbourhood']):
    label = '{}, {}, {}'.format(postal_code, neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### (d) Define Foursquare credentials and version

In [16]:
CLIENT_ID = 'VKSYDPNJNRYWDFOGX343VHXJVXTCSYEA2SB4D5OB3GGXO5KS' # your Foursquare ID
CLIENT_SECRET = '4F3YJSXCMZRM2FE0N3OZ5PQRPAQGP0ITB2PJQ2M1MLV2EYY1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: VKSYDPNJNRYWDFOGX343VHXJVXTCSYEA2SB4D5OB3GGXO5KS
CLIENT_SECRET:4F3YJSXCMZRM2FE0N3OZ5PQRPAQGP0ITB2PJQ2M1MLV2EYY1


#### (e) Define function for getting venues

In [17]:
def getNearbyVenues(postal_codes, boroughs, neighbourhoods, latitudes, longitudes, radius=750):
    
    venues_list=[]
    LIMIT = 100
    
    for postal_code, borough, neighbourhood, lat, lng in zip(postal_codes, boroughs, neighbourhoods, latitudes, longitudes):
        print('{}, {}, {}'.format(postal_code, borough, neighbourhood))
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            postal_code,
            borough,
            neighbourhood,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode',
                  'Borough',
                  'Neighbourhood',
                  'PostalCode Latitude', 
                  'PostalCode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### (e) Get Toronto venues

In [18]:
toronto_venues = getNearbyVenues(postal_codes=torontodf['PostalCode'],
                                 boroughs=torontodf['Borough'],
                                 neighbourhoods=torontodf['Neighbourhood'],
                                   latitudes=torontodf['Latitude'],
                                   longitudes=torontodf['Longitude']
                                  )

M1B, Scarborough, Malvern, Rouge
M1C, Scarborough, Rouge Hill, Port Union, Highland Creek
M1E, Scarborough, Guildwood, Morningside, West Hill
M1G, Scarborough, Woburn
M1H, Scarborough, Cedarbrae
M1J, Scarborough, Scarborough Village
M1K, Scarborough, Kennedy Park, Ionview, East Birchmount Park
M1L, Scarborough, Golden Mile, Clairlea, Oakridge
M1M, Scarborough, Cliffside, Cliffcrest, Scarborough Village West
M1N, Scarborough, Birch Cliff, Cliffside West
M1P, Scarborough, Dorset Park, Wexford Heights, Scarborough Town Centre
M1R, Scarborough, Wexford, Maryvale
M1S, Scarborough, Agincourt
M1T, Scarborough, Clarks Corners, Tam O'Shanter, Sullivan
M1V, Scarborough, Milliken, Agincourt North, Steeles East, L'Amoreaux East
M1W, Scarborough, Steeles West, L'Amoreaux West
M1X, Scarborough, Upper Rouge
M2H, North York, Hillcrest Village
M2J, North York, Fairview, Henry Farm, Oriole
M2K, North York, Bayview Village
M2L, North York, York Mills, Silver Hills
M2M, North York, Willowdale, Newtonbrook

#### (f) Investigate results

In [19]:
# Get shape of venue dataframe
print(toronto_venues.shape)

# Show top of venue dataframe
toronto_venues

(3656, 9)


Unnamed: 0,PostalCode,Borough,Neighbourhood,PostalCode Latitude,PostalCode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
2,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.198080,Fast Food Restaurant
3,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
4,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,Tim Hortons,43.802000,-79.198169,Coffee Shop
...,...,...,...,...,...,...,...,...,...
3651,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,McDonald's,43.741757,-79.584230,Fast Food Restaurant
3652,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Pizza Nova,43.736761,-79.589817,Pizza Place
3653,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,LCBO,43.741508,-79.584501,Liquor Store
3654,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Caribbean Heat 2,43.743186,-79.582367,Caribbean Restaurant


In [20]:
# Show number of results per area
toronto_venues.groupby('PostalCode').count()['Borough']

PostalCode
M1B    10
M1C     5
M1E    13
M1G     6
M1H    20
       ..
M9N     5
M9P    14
M9R    13
M9V    14
M9W     1
Name: Borough, Length: 102, dtype: int64

In [21]:
#Show the number of unique venue categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 322 uniques categories.


#### (g) Analyse each postal code

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

# show top of resulting dataframe
toronto_onehot.head()

Unnamed: 0,PostalCode,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Get size of toronto_onehot dataframe
toronto_onehot.shape

(3656, 323)

In [24]:
# Group rows by neighbourhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('PostalCode').mean().reset_index()
toronto_grouped

Unnamed: 0,PostalCode,ATM,Accessories Store,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,M9N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
98,M9P,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
99,M9R,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00
100,M9V,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00


In [25]:
# Get size of grouped dataframe
toronto_grouped.shape

(102, 323)

#### (h) Cluster by Postal Code

In [26]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('PostalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 2, 1, 1, 1, 0, 0, 0])

#### (i) Merge cluster data and visualise

In [27]:
# Function to return most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
# Get most common venues in each PostalCode
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
postalcodes_venues_sorted = pd.DataFrame(columns=columns)
postalcodes_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']

for ind in np.arange(toronto_grouped.shape[0]):
    postalcodes_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

postalcodes_venues_sorted.head()

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Coffee Shop,Trail,Spa,Bus Station,Paper / Office Supplies Store,Hobby Shop,African Restaurant,Concert Hall,Eastern European Restaurant
1,M1C,Breakfast Spot,Bar,Italian Restaurant,Burger Joint,Yoga Studio,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,M1E,Fast Food Restaurant,Pizza Place,Restaurant,Beer Store,Bank,Fried Chicken Joint,Sports Bar,Park,Greek Restaurant,Filipino Restaurant
3,M1G,Coffee Shop,Park,Business Service,Construction & Landscaping,Dumpling Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
4,M1H,Indian Restaurant,Coffee Shop,Yoga Studio,Flower Shop,Burger Joint,Bus Line,Lounge,Fried Chicken Joint,Bank,Bakery


In [29]:
# add clustering labels
postalcodes_venues_sorted.insert(0, 'Cluster Labels', np.array(kmeans.labels_, dtype='int'))
postalcodes_venues_sorted.head()

Unnamed: 0,Cluster Labels,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,1,M1B,Fast Food Restaurant,Coffee Shop,Trail,Spa,Bus Station,Paper / Office Supplies Store,Hobby Shop,African Restaurant,Concert Hall,Eastern European Restaurant
1,1,M1C,Breakfast Spot,Bar,Italian Restaurant,Burger Joint,Yoga Studio,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,1,M1E,Fast Food Restaurant,Pizza Place,Restaurant,Beer Store,Bank,Fried Chicken Joint,Sports Bar,Park,Greek Restaurant,Filipino Restaurant
3,2,M1G,Coffee Shop,Park,Business Service,Construction & Landscaping,Dumpling Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
4,1,M1H,Indian Restaurant,Coffee Shop,Yoga Studio,Flower Shop,Burger Joint,Bus Line,Lounge,Fried Chicken Joint,Bank,Bakery


In [30]:
toronto_merged = torontodf

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(postalcodes_venues_sorted.set_index('PostalCode'), on='PostalCode')
toronto_merged['Cluster Labels'] = pd.to_numeric(toronto_merged['Cluster Labels'], downcast='integer')

# Drop rows with NA - i.e. where no venues were found on Foursquare
toronto_merged = toronto_merged.dropna()

#Show dataframe
toronto_merged 

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1.0,Fast Food Restaurant,Coffee Shop,Trail,Spa,Bus Station,Paper / Office Supplies Store,Hobby Shop,African Restaurant,Concert Hall,Eastern European Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1.0,Breakfast Spot,Bar,Italian Restaurant,Burger Joint,Yoga Studio,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Fast Food Restaurant,Pizza Place,Restaurant,Beer Store,Bank,Fried Chicken Joint,Sports Bar,Park,Greek Restaurant,Filipino Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2.0,Coffee Shop,Park,Business Service,Construction & Landscaping,Dumpling Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Indian Restaurant,Coffee Shop,Yoga Studio,Flower Shop,Burger Joint,Bus Line,Lounge,Fried Chicken Joint,Bank,Bakery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188,0.0,Pharmacy,Diner,Gift Shop,Fried Chicken Joint,Pizza Place,Yoga Studio,Discount Store,Distribution Center,Dive Bar,Dog Run
99,M9P,Etobicoke,Westmount,43.696319,-79.532242,1.0,Pizza Place,Sandwich Place,Flea Market,Discount Store,Gas Station,Intersection,Chinese Restaurant,Golf Course,Golf Driving Range,Supermarket
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,1.0,Sandwich Place,Beer Store,Bank,Shopping Mall,Gas Station,Intersection,Chinese Restaurant,American Restaurant,Supermarket,Pharmacy
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,0.0,Pizza Place,Grocery Store,Pharmacy,Fried Chicken Joint,Fast Food Restaurant,Hardware Store,Beer Store,Liquor Store,Sandwich Place,Caribbean Restaurant


In [31]:
toronto_merged['Cluster Labels'].value_counts()

1.0    82
0.0     9
2.0     9
3.0     1
4.0     1
Name: Cluster Labels, dtype: int64

In [32]:
map_clusters = folium.Map(location=[torontolatitude, torontolongitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### (j) Examine clusters

In [33]:
# Cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Scarborough,0.0,Intersection,Diner,Bakery,Ice Cream Shop,Bus Station,Metro Station,Coffee Shop,Bus Line,Convenience Store,Soccer Field
8,Scarborough,0.0,Wings Joint,Hardware Store,Restaurant,Pizza Place,Chinese Restaurant,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Distribution Center
9,Scarborough,0.0,College Stadium,Skating Rink,Thai Restaurant,General Entertainment,Café,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
36,East York,0.0,Skating Rink,Bus Line,Beer Store,Park,Curling Ice,Pharmacy,Bus Stop,Yoga Studio,Doner Restaurant,Distribution Center
63,Central Toronto,0.0,Playground,Garden,Pet Store,Spa,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant
79,North York,0.0,Home Service,Garden Center,Bakery,Yoga Studio,Eastern European Restaurant,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
96,North York,0.0,Bakery,Pizza Place,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
98,York,0.0,Pharmacy,Diner,Gift Shop,Fried Chicken Joint,Pizza Place,Yoga Studio,Discount Store,Distribution Center,Dive Bar,Dog Run
101,Etobicoke,0.0,Pizza Place,Grocery Store,Pharmacy,Fried Chicken Joint,Fast Food Restaurant,Hardware Store,Beer Store,Liquor Store,Sandwich Place,Caribbean Restaurant


In [34]:
# Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,1.0,Fast Food Restaurant,Coffee Shop,Trail,Spa,Bus Station,Paper / Office Supplies Store,Hobby Shop,African Restaurant,Concert Hall,Eastern European Restaurant
1,Scarborough,1.0,Breakfast Spot,Bar,Italian Restaurant,Burger Joint,Yoga Studio,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
2,Scarborough,1.0,Fast Food Restaurant,Pizza Place,Restaurant,Beer Store,Bank,Fried Chicken Joint,Sports Bar,Park,Greek Restaurant,Filipino Restaurant
4,Scarborough,1.0,Indian Restaurant,Coffee Shop,Yoga Studio,Flower Shop,Burger Joint,Bus Line,Lounge,Fried Chicken Joint,Bank,Bakery
5,Scarborough,1.0,Ice Cream Shop,Restaurant,Sandwich Place,Convenience Store,Pizza Place,Fast Food Restaurant,Coffee Shop,Event Space,Doner Restaurant,Diner
...,...,...,...,...,...,...,...,...,...,...,...,...
93,Etobicoke,1.0,Pharmacy,Shopping Mall,Café,Playground,Skating Rink,Bank,Grocery Store,Park,Dog Run,Diner
94,Etobicoke,1.0,Pizza Place,Convenience Store,Mexican Restaurant,Coffee Shop,Theater,Gym,Hotel,Restaurant,Bank,Dumpling Restaurant
95,Etobicoke,1.0,Shopping Plaza,Café,Beer Store,Coffee Shop,Pizza Place,Electronics Store,Gas Station,Pharmacy,Pet Store,Liquor Store
99,Etobicoke,1.0,Pizza Place,Sandwich Place,Flea Market,Discount Store,Gas Station,Intersection,Chinese Restaurant,Golf Course,Golf Driving Range,Supermarket


In [35]:
# Cluster 3
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,2.0,Coffee Shop,Park,Business Service,Construction & Landscaping,Dumpling Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
21,North York,2.0,Park,Coffee Shop,Sandwich Place,Pizza Place,Trail,Bank,Indian Restaurant,Yoga Studio,Doner Restaurant,Discount Store
23,North York,2.0,Park,Pet Store,Bowling Alley,Convenience Store,Yoga Studio,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
25,North York,2.0,Bus Stop,Park,Food & Drink Shop,Road,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
48,Central Toronto,2.0,Park,Grocery Store,Café,Thai Restaurant,Sandwich Place,Candy Store,Japanese Restaurant,Gym / Fitness Center,Gym,Dive Bar
50,Downtown Toronto,2.0,Park,Trail,Candy Store,Playground,Yoga Studio,Doner Restaurant,Diner,Discount Store,Distribution Center,Dive Bar
64,Central Toronto,2.0,Jewelry Store,Bus Line,Trail,Sushi Restaurant,Park,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar
91,Etobicoke,2.0,Baseball Field,Park,Playground,Gym / Fitness Center,Eastern European Restaurant,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant
97,North York,2.0,Baseball Field,Discount Store,Park,Convenience Store,Gas Station,Dumpling Restaurant,Distribution Center,Dive Bar,Dog Run,Doner Restaurant


In [36]:
# Cluster 4
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,Etobicoke,3.0,Rental Car Location,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop


In [37]:
# Cluster 5
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,North York,4.0,Pool,Martial Arts School,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop
