In [2]:
import requests
import pandas as pd

link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
f = requests.get(link).text

# utilize BeautifulSoup to scrape the web page
from bs4 import BeautifulSoup
soup = BeautifulSoup(f,'lxml')

In [3]:
# find the table with postal codes
My_table = soup.find('table',{'cellpadding':"2"})

In [103]:
# collect all postcodes and their corresponding boroughs and neighborhoods.
# if the neighborhood is not assigned, then the neighborhood will be the same as the borough.
postCode = []
borough = []
allNeighbor = {}
num = 0

for row in My_table.findAll('p'):
    cells1 = row.findAll('b')
    cells2 = row.findAll('a')
    if len(cells2) > 0:
        neighborhood = []
        postCode.append(cells1[0].text)
        borough.append(cells2[0].text)
        if len(cells2) > 1:
            for i in range(len(cells2)-1):
                neighborhood.append(cells2[i+1].text)
        else:
            neighborhood.append(cells2[0].text)
        allNeighbor[num] = neighborhood
        num = num+1

In [114]:
# create a dataframe showing all the information
df = pd.DataFrame()
df['PostalCode'] = postCode
df['Borough'] = borough
df['Neighborhood'] = allNeighbor.values()

df.shape # 101 rows
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,[Parkwoods]
1,M4A,North York,[Victoria Village]
2,M5A,Downtown Toronto,"[Regent Park, Harbourfront]"
3,M6A,North York,"[Lawrence Manor, Lawrence Heights]"
4,M7A,Queen's Park,[Ontario Provincial Government]


In [131]:
# find longitude and latitude for each postal code
csv_path = 'Geospatial_Coordinates.csv'
location = pd.read_csv(csv_path)
locationDic = {}
row = 0;
for codes in location['Postal Code']:
    list = [location.iloc[row,1], location.iloc[row,2]]
    locationDic[codes] = list
    row = row+1

In [132]:
# add longitude and latitude variables to the dataframe
Latitude = []
Longitude = []

for codes in df['PostalCode']:
    if codes in locationDic:
        Latitude.append(locationDic[codes][0])
        Longitude.append(locationDic[codes][1])
    else:
        Latitude.append[0]
        Longitude.append[0]    
        
df['Latitude'] = Latitude
df['Longitude'] = Longitude

In [133]:
# drop locations without Latitude and Longitude
df =df[df['Latitude'] !=0]
df =df[df['Longitude'] !=0]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,[Parkwoods],43.753259,-79.329656
1,M4A,North York,[Victoria Village],43.725882,-79.315572
2,M5A,Downtown Toronto,"[Regent Park, Harbourfront]",43.65426,-79.360636
3,M6A,North York,"[Lawrence Manor, Lawrence Heights]",43.718518,-79.464763
4,M7A,Queen's Park,[Ontario Provincial Government],43.662301,-79.389494


In [63]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [79]:
# find latitude and longitude of Toronto, Ontario
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude
print(latitude, longitude)
# [latitude, longitude] - [43.653963, -79.387207]

# create a map using latitude and longitude values
map = folium.Map(location=[43.653963, -79.387207], zoom_start=10)
map

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

43.653963 -79.387207


In [80]:
# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

In [83]:
CLIENT_ID = '0XKX2RX2NRRD0FADR5PGUQ1YDX1MWT2WVCWQ5F5ZNMZVL4HC' 
CLIENT_SECRET = 'DJ20F5N55EXD2HOJYSIU0J2JRPEBTAFO1RUY23NRIG0T2JJ5' 
VERSION = '20180604'
LIMIT = 100

In [129]:
# create a function to get nearby venues
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [151]:
# get nearby venues of Downtown Toronto
toronto_data = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
toronto_data['Neighborhood'] = toronto_data['Neighborhood'].apply(tuple)

venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

('Regent Park', 'Harbourfront')
('Garden District', 'Ryerson')
('St. James Town',)
('Downtown Toronto',)
('Bay Street',)
('Downtown Toronto',)
('Richmond', 'King')
('Harbourfront', 'Union Station', 'Toronto Islands')
('Toronto Dominion Centre', 'Design Exchange')
('Commerce Court', 'Victoria Hotel')
('University of Toronto',)
('Kensington Market', 'Chinatown', 'Grange Park')
('CN Tower', 'King and Spadina', 'Railway Lands', 'Harbourfront', 'South Niagara', 'Island airport')
('Rosedale',)
('Downtown Toronto',)
('St. James Town', 'Cabbagetown')
('First Canadian Place', 'Underground city')
('Church and Wellesley',)


In [136]:
print(venues.shape)
venues.head()

(1274, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"(Regent Park, Harbourfront)",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"(Regent Park, Harbourfront)",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"(Regent Park, Harbourfront)",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"(Regent Park, Harbourfront)",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"(Regent Park, Harbourfront)",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [137]:
print('There are {} uniques categories.'.format(len(venues['Venue Category'].unique())))

There are 209 uniques categories.


In [138]:
# check how many venues were returned for each neighborhood
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"(Bay Street,)",83,83,83,83,83,83
"(CN Tower, King and Spadina, Railway Lands, Harbourfront, South Niagara, Island airport)",16,16,16,16,16,16
"(Church and Wellesley,)",87,87,87,87,87,87
"(Commerce Court, Victoria Hotel)",100,100,100,100,100,100
"(Downtown Toronto,)",169,169,169,169,169,169
"(First Canadian Place, Underground city)",100,100,100,100,100,100
"(Garden District, Ryerson)",100,100,100,100,100,100
"(Harbourfront, Union Station, Toronto Islands)",100,100,100,100,100,100
"(Kensington Market, Chinatown, Grange Park)",87,87,87,87,87,87
"(Regent Park, Harbourfront)",46,46,46,46,46,46


In [139]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = venues['Neighborhood'] 

onehot.shape

(1274, 209)

In [140]:
#group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"(Bay Street,)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012048,0.0,...,0.0,0.0,0.012048,0.0,0.0,0.012048,0.0,0.0,0.0,0.012048
1,"(CN Tower, King and Spadina, Railway Lands, Ha...",0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"(Church and Wellesley,)",0.011494,0.0,0.0,0.0,0.0,0.0,0.0,0.011494,0.0,...,0.0,0.0,0.0,0.0,0.011494,0.0,0.011494,0.011494,0.0,0.011494
3,"(Commerce Court, Victoria Hotel)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0
4,"(Downtown Toronto,)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005917,0.005917,...,0.0,0.0,0.011834,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"(First Canadian Place, Underground city)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0
6,"(Garden District, Ryerson)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0
7,"(Harbourfront, Union Station, Toronto Islands)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0
8,"(Kensington Market, Chinatown, Grange Park)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.045977,0.0,0.057471,0.011494,0.0,0.0,0.0,0.0
9,"(Regent Park, Harbourfront)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739


In [143]:
# print each neighborhood along with the top 5 most common venues¶
num_top_venues = 5

for hood in grouped['Neighborhood']:
    print("----"+"".join(hood)+"----")
    temp = grouped[grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bay Street----
                 venue  freq
0          Coffee Shop  0.17
1   Italian Restaurant  0.05
2         Burger Joint  0.04
3       Ice Cream Shop  0.04
4  Japanese Restaurant  0.04


----CN TowerKing and SpadinaRailway LandsHarbourfrontSouth NiagaraIsland airport----
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3          Boutique  0.06
4       Coffee Shop  0.06


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.08
1  Japanese Restaurant  0.06
2              Gay Bar  0.05
3           Restaurant  0.03
4     Sushi Restaurant  0.03


----Commerce CourtVictoria Hotel----
         venue  freq
0  Coffee Shop  0.11
1   Restaurant  0.07
2         Café  0.07
3        Hotel  0.05
4          Gym  0.04


----Downtown Toronto----
                venue  freq
0         Coffee Shop  0.10
1                Café  0.05
2  Seafood Restaurant  0.04
3          Restaurant  0.04
4            Beer Bar  0.03

In [144]:
# create a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [146]:
# create a new dataframe and display the top 10 venues for each neighborhood
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"(Bay Street,)",Coffee Shop,Italian Restaurant,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Burger Joint,Gym / Fitness Center,Middle Eastern Restaurant,Bubble Tea Shop,Café
1,"(CN Tower, King and Spadina, Railway Lands, Ha...",Airport Lounge,Airport Service,Airport Terminal,Plane,Harbor / Marina,Sculpture Garden,Boat or Ferry,Rental Car Location,Coffee Shop,Boutique
2,"(Church and Wellesley,)",Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Sushi Restaurant,Hotel,Gastropub,Pub,Men's Store,Mediterranean Restaurant
3,"(Commerce Court, Victoria Hotel)",Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Deli / Bodega,Seafood Restaurant,Japanese Restaurant,Italian Restaurant
4,"(Downtown Toronto,)",Coffee Shop,Café,Restaurant,Seafood Restaurant,Park,Beer Bar,Bakery,Cheese Shop,Japanese Restaurant,Cocktail Bar


In [153]:
# use k-means to cluster the neighborhood into 5 clusters
from sklearn.cluster import KMeans 

# set number of clusters
kclusters = 5

grouped_clustering = grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 0, 4, 1, 1, 1, 4, 1, 4, 1], dtype=int32)

In [155]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"(Regent Park, Harbourfront)",43.65426,-79.360636,1,Coffee Shop,Bakery,Pub,Park,Theater,Mexican Restaurant,Café,Beer Store,Breakfast Spot,Restaurant
1,M5B,Downtown Toronto,"(Garden District, Ryerson)",43.657162,-79.378937,4,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Japanese Restaurant,Cosmetics Shop,Café,Diner,Pizza Place,Electronics Store,Bookstore
2,M5C,Downtown Toronto,"(St. James Town,)",43.651494,-79.375418,4,Coffee Shop,Café,Restaurant,Italian Restaurant,Hotel,Diner,Breakfast Spot,Clothing Store,Bakery,Cosmetics Shop
3,M5E,Downtown Toronto,"(Downtown Toronto,)",43.644771,-79.373306,1,Coffee Shop,Café,Restaurant,Seafood Restaurant,Park,Beer Bar,Bakery,Cheese Shop,Japanese Restaurant,Cocktail Bar
4,M5G,Downtown Toronto,"(Bay Street,)",43.657952,-79.387383,1,Coffee Shop,Italian Restaurant,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Burger Joint,Gym / Fitness Center,Middle Eastern Restaurant,Bubble Tea Shop,Café


In [158]:
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(merged['Latitude'], merged['Longitude'], merged['Neighborhood'], merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [166]:
#examine each cluster

# cluster 0
merged.loc[merged['Cluster Labels'] == 0, merged.columns[[1] + [6]+[7]+[8]+[9]+[10]+[11]+[12]+[13]+[14]+[15]]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,Airport Lounge,Airport Service,Airport Terminal,Plane,Harbor / Marina,Sculpture Garden,Boat or Ferry,Rental Car Location,Coffee Shop,Boutique


In [167]:
# cluster 1
merged.loc[merged['Cluster Labels'] == 1, merged.columns[[1] + [6]+[7]+[8]+[9]+[10]+[11]+[12]+[13]+[14]+[15]]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Coffee Shop,Bakery,Pub,Park,Theater,Mexican Restaurant,Café,Beer Store,Breakfast Spot,Restaurant
3,Downtown Toronto,Coffee Shop,Café,Restaurant,Seafood Restaurant,Park,Beer Bar,Bakery,Cheese Shop,Japanese Restaurant,Cocktail Bar
4,Downtown Toronto,Coffee Shop,Italian Restaurant,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Burger Joint,Gym / Fitness Center,Middle Eastern Restaurant,Bubble Tea Shop,Café
5,Downtown Toronto,Coffee Shop,Café,Restaurant,Seafood Restaurant,Park,Beer Bar,Bakery,Cheese Shop,Japanese Restaurant,Cocktail Bar
7,Downtown Toronto,Coffee Shop,Aquarium,Hotel,Italian Restaurant,Café,Restaurant,Sporting Goods Shop,Scenic Lookout,Brewery,Fried Chicken Joint
8,Downtown Toronto,Coffee Shop,Hotel,Café,Restaurant,Japanese Restaurant,Gastropub,Seafood Restaurant,American Restaurant,Bar,Italian Restaurant
9,Downtown Toronto,Coffee Shop,Café,Restaurant,Hotel,Gym,American Restaurant,Deli / Bodega,Seafood Restaurant,Japanese Restaurant,Italian Restaurant
14,Downtown Toronto,Coffee Shop,Café,Restaurant,Seafood Restaurant,Park,Beer Bar,Bakery,Cheese Shop,Japanese Restaurant,Cocktail Bar
16,Downtown Toronto,Coffee Shop,Café,Restaurant,Gym,Hotel,Seafood Restaurant,Asian Restaurant,Japanese Restaurant,American Restaurant,Steakhouse


In [168]:
# cluster 2
merged.loc[merged['Cluster Labels'] == 2, merged.columns[[1] + [6]+[7]+[8]+[9]+[10]+[11]+[12]+[13]+[14]+[15]]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,Park,Playground,Trail,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [169]:
# cluster 3
merged.loc[merged['Cluster Labels'] == 3, merged.columns[[1] + [6]+[7]+[8]+[9]+[10]+[11]+[12]+[13]+[14]+[15]]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,Café,Restaurant,Bakery,Bar,Bookstore,Japanese Restaurant,Italian Restaurant,Dessert Shop,Pub,Noodle House


In [170]:
# cluster 4
merged.loc[merged['Cluster Labels'] == 4, merged.columns[[1] + [6]+[7]+[8]+[9]+[10]+[11]+[12]+[13]+[14]+[15]]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Japanese Restaurant,Cosmetics Shop,Café,Diner,Pizza Place,Electronics Store,Bookstore
2,Downtown Toronto,Coffee Shop,Café,Restaurant,Italian Restaurant,Hotel,Diner,Breakfast Spot,Clothing Store,Bakery,Cosmetics Shop
6,Downtown Toronto,Coffee Shop,Restaurant,Café,Thai Restaurant,Steakhouse,Bar,Gastropub,Bookstore,Pizza Place,Breakfast Spot
11,Downtown Toronto,Bar,Vietnamese Restaurant,Café,Coffee Shop,Vegetarian / Vegan Restaurant,Bakery,Mexican Restaurant,Dumpling Restaurant,Comfort Food Restaurant,Cocktail Bar
15,Downtown Toronto,Coffee Shop,Bakery,Park,Restaurant,Italian Restaurant,Café,Pizza Place,Pharmacy,Pet Store,Pub
17,Downtown Toronto,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Sushi Restaurant,Hotel,Gastropub,Pub,Men's Store,Mediterranean Restaurant


In [None]:
# cluster 0 is where the airport and harbor are located.
# cluster 2 is good for a walk, it has parks, playgrounds, and trails.
# cluster 1, 3, 4 are very similar. They are the places where you can  
# walk around to shop and eat. But cluster 3 has more coffee shops and bakeries.