The code in this notebook is to webscrape the New york postal code,borough and neighborhood information from wikipedia
and clean the acquired dataset.

Includes part 1, part 2 and part 3

Importing necessary libraries

In [78]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import matplotlib 
import requests # library to handle requests
from pandas import json_normalize # tranform JSON file into a pandas dataframe
from bs4 import BeautifulSoup
import lxml
import math
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
import folium

Webscrapping the data and storing it in a pandas dataframe

In [37]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]
df = pd.DataFrame(df)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


Drop all rows where borough is not assigned

In [38]:
df1 = df[df["Borough"]!="Not assigned"]
df1=df1.reset_index(drop=True)
df1.head()


Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


Concatenate the neighborhoods with same postal code and borough using ','.
Note: wikipedia has already concatenated such neighborhoods using ' \'. we shall replace it with ',' to get what is expected.

In [39]:
df2 = df1.groupby(['Postal code','Borough'],as_index=True)['Neighborhood'].apply(', '.join).reset_index()
for i in range(df2.shape[0]):
    df2.iloc[i,2]=df2.iloc[i,2].replace(' /',',')
df2.head()


Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


If neighborhhod is not assigned then it should be replaced with borough name

In [40]:
for i in range(df2.shape[0]):
    if (df2['Neighborhood'][i]==np.nan):
        df2['Neighborhood'][i]=df2["Borough"][i]
        
df2.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Find the number of rows in our dataset.

In [41]:
df2.shape[0]

103

In [42]:
geo = pd.read_csv("https://cocl.us/Geospatial_data")
geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [330]:
geo.rename(columns={"Postal Code":"Postal code_y"},inplace=True)
data = pd.merge(df2, geo, left_on='Postal code', right_on="Postal code_y")
del data['Postal code_y']
data

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [331]:
address = 'Toronto'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [332]:
# create map of toronto using latitude and longitude values
map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

In [333]:
borough_names = list(data.Borough.unique())

borough_with_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)
        
borough_with_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [334]:
data = data[data['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(data.shape)
data

(39, 5)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


In [335]:
# create map of toronto using latitude and longitude values
newmap = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(newmap)  
    
newmap

Now forming the url for api and using it

In [53]:
CLIENT_ID = '5SXQLEP4XW0YPT12WC03YKD4K1F01MUE1422BLADUNJP3FH2' # your Foursquare ID
CLIENT_SECRET = 'BAI5ARXH42QCA0CPFZI4Q3X4K1XIR1P13EFAFJV341KONLZN' # your Foursquare Secret
VERSION = '20180323' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5SXQLEP4XW0YPT12WC03YKD4K1F01MUE1422BLADUNJP3FH2
CLIENT_SECRET:BAI5ARXH42QCA0CPFZI4Q3X4K1XIR1P13EFAFJV341KONLZN


In [54]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    500, 
    50)

Finding top 20 venues in each neighborhood

In [164]:
radius = 200
LIMIT = 20

venues = []

for lat, long, post, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Postal code'], data['Borough'], data['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [133]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Postal code', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(312, 9)


Unnamed: 0,Postal code,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Stewart Ravine,43.6763,-79.294784,Other Great Outdoors
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Glen Stewart Park,43.675278,-79.294647,Park
3,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,Kalyvia,43.677973,-79.351208,Greek Restaurant
4,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,British Style Fish & Chips,43.668723,-79.317139,Fish & Chips Shop


In [162]:
venues_df.groupby(["Postal code", "Borough", "Neighborhood"]).count()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Postal code,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,3,3,3,3,3,3
M4K,East Toronto,"The Danforth West, Riverdale",1,1,1,1,1,1
M4L,East Toronto,"India Bazaar, The Beaches West",2,2,2,2,2,2
M4M,East Toronto,Studio District,16,16,16,16,16,16
M4N,Central Toronto,Lawrence Park,2,2,2,2,2,2
M4P,Central Toronto,Davisville North,2,2,2,2,2,2
M4S,Central Toronto,Davisville,15,15,15,15,15,15
M4T,Central Toronto,"Moore Park, Summerhill East",3,3,3,3,3,3
M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",4,4,4,4,4,4
M4W,Downtown Toronto,Rosedale,1,1,1,1,1,1


In [136]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
venues_df.shape

There are 119 uniques categories.


(312, 9)

Preparing data set for kmeans clustering

In [138]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['Postal code'] = venues_df['Postal code'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(312, 122)


Unnamed: 0,Postal code,Borough,Neighborhoods,Adult Boutique,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4K,East Toronto,"The Danforth West, Riverdale",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4L,East Toronto,"India Bazaar, The Beaches West",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [140]:

toronto_grouped = toronto_onehot.groupby(["Postal code", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped.head(20)

(34, 122)


Unnamed: 0,Postal code,Borough,Neighborhoods,Adult Boutique,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Auto Workshop,BBQ Joint,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"The Danforth West, Riverdale",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M4L,East Toronto,"India Bazaar, The Beaches West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0
7,M4T,Central Toronto,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4W,Downtown Toronto,Rosedale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Now let's create the new dataframe and display the top 10 venues for each PostalCode.

In [143]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['Postal code', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postal code'] = toronto_grouped['Postal code']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(34, 13)


Unnamed: 0,Postal code,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Other Great Outdoors,Trail,Park,Farmers Market,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",Greek Restaurant,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega
2,M4L,East Toronto,"India Bazaar, The Beaches West",Park,Fish & Chips Shop,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
3,M4M,East Toronto,Studio District,Coffee Shop,Café,Sandwich Place,Bar,Pet Store,Clothing Store,Cheese Shop,Seafood Restaurant,Bookstore,Gastropub
4,M4N,Central Toronto,Lawrence Park,Jewelry Store,Lake,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
5,M4P,Central Toronto,Davisville North,Convenience Store,Breakfast Spot,Yoga Studio,College Rec Center,Comic Shop,Concert Hall,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega
6,M4S,Central Toronto,Davisville,Café,Italian Restaurant,Coffee Shop,Dessert Shop,Indian Restaurant,Pizza Place,Sushi Restaurant,Seafood Restaurant,Costume Shop,Toy / Game Store
7,M4T,Central Toronto,"Moore Park, Summerhill East",Moving Target,Sports Club,Park,Yoga Studio,Farmers Market,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
8,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",Park,Liquor Store,Coffee Shop,Supermarket,Cuban Restaurant,Diner,Dessert Shop,Department Store,Deli / Bodega,Creperie
9,M4W,Downtown Toronto,Rosedale,Park,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


Now performing k means clustering of data using 5 cluster points

In [145]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["Postal code", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
print(kmeans.labels_.shape)
toronto_grouped_clustering.shape

(34,)


(34, 119)

The clustering is done. Now assigning the cluster label to each row

In [306]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = data.copy()

z = data.copy()
x=[]
z = z.drop(z.index[0:])
for i in range(toronto_merged.shape[0]):
    if toronto_merged['Neighborhood'][i] in venues_df['Neighborhood'].unique():
        x.append(i)
toronto_merged=z.append(toronto_merged.iloc[x]).reset_index()
del toronto_merged['index']
# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("Postal code"), on="Postal code")

print(toronto_merged.shape)

toronto_merged 

(34, 16)


Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Other Great Outdoors,Trail,Park,Farmers Market,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,4,Greek Restaurant,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,2,Park,Fish & Chips Shop,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
3,M4M,East Toronto,Studio District,43.659526,-79.340923,1,Coffee Shop,Café,Sandwich Place,Bar,Pet Store,Clothing Store,Cheese Shop,Seafood Restaurant,Bookstore,Gastropub
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,1,Jewelry Store,Lake,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197,1,Convenience Store,Breakfast Spot,Yoga Studio,College Rec Center,Comic Shop,Concert Hall,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega
6,M4S,Central Toronto,Davisville,43.704324,-79.38879,1,Café,Italian Restaurant,Coffee Shop,Dessert Shop,Indian Restaurant,Pizza Place,Sushi Restaurant,Seafood Restaurant,Costume Shop,Toy / Game Store
7,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,1,Moving Target,Sports Club,Park,Yoga Studio,Farmers Market,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
8,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049,1,Park,Liquor Store,Coffee Shop,Supermarket,Cuban Restaurant,Diner,Dessert Shop,Department Store,Deli / Bodega,Creperie
9,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,2,Park,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


Viewing the different clusters one by one.

In [315]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,West Toronto,0,Bowling Alley,Yoga Studio,College Rec Center,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


In [316]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,1,Other Great Outdoors,Trail,Park,Farmers Market,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
3,East Toronto,1,Coffee Shop,Café,Sandwich Place,Bar,Pet Store,Clothing Store,Cheese Shop,Seafood Restaurant,Bookstore,Gastropub
4,Central Toronto,1,Jewelry Store,Lake,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
5,Central Toronto,1,Convenience Store,Breakfast Spot,Yoga Studio,College Rec Center,Comic Shop,Concert Hall,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega
6,Central Toronto,1,Café,Italian Restaurant,Coffee Shop,Dessert Shop,Indian Restaurant,Pizza Place,Sushi Restaurant,Seafood Restaurant,Costume Shop,Toy / Game Store
7,Central Toronto,1,Moving Target,Sports Club,Park,Yoga Studio,Farmers Market,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop
8,Central Toronto,1,Park,Liquor Store,Coffee Shop,Supermarket,Cuban Restaurant,Diner,Dessert Shop,Department Store,Deli / Bodega,Creperie
10,Downtown Toronto,1,Café,Restaurant,Market,Diner,Coffee Shop,General Entertainment,Indian Restaurant,Bakery,Italian Restaurant,Pizza Place
11,Downtown Toronto,1,Burger Joint,Adult Boutique,Japanese Restaurant,Coffee Shop,Mexican Restaurant,Martial Arts Dojo,Poke Place,Bubble Tea Shop,Dessert Shop,Restaurant
12,Downtown Toronto,1,History Museum,Bakery,Gastropub,Flower Shop,Coffee Shop,Breakfast Spot,Spa,Gym / Fitness Center,BBQ Joint,Bank


In [317]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East Toronto,2,Park,Fish & Chips Shop,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant
9,Downtown Toronto,2,Park,Health & Beauty Service,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


In [318]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Central Toronto,3,Health & Beauty Service,College Rec Center,Colombian Restaurant,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


In [319]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,4,Greek Restaurant,Yoga Studio,Flower Shop,Comic Shop,Concert Hall,Convenience Store,Costume Shop,Creperie,Cuban Restaurant,Deli / Bodega


observation: Cluster 1 are bowling alleys and  yoga studios.Cluster 2 is general business areas with shops, these are the most common type of neighborhoods. Cluster 3 are just parks. Cluster 4 is college rec centers. Cluster 5 are greek restaurants and yoga studios.