# 1. Scrape the wikipedia page

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

### Find the Table

In [4]:
table = soup.find('table',{'class':'wikitable sortable'})

### Iteration: loop through the rows to get the data

In [4]:
PostalCode =[]
Borough = []
Neighbourhood =[]

In [5]:
for row in table.findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 3:
        PostalCode.append(cells[0].find(text=True))
        Borough.append(cells[1].find(text=True))
        Neighbourhood.append(cells[2].find(text=True))

In [6]:
df = pd.DataFrame(PostalCode, columns = ['PostalCode'])
df['Borough'] = Borough
df['Neighbourhood'] = Neighbourhood

# 2. Clean the table

### (1). Remove cells with a borough that is ''Not assigned''

In [8]:
condition = df.Borough == 'Not assigned'
df = df.drop(df[condition].index, axis = 0, inplace = False)

### (2). For cells with a borough but a ''Not assigned'' neighborhood, replace the neighborhood with the borough.

In [10]:
df['Neighbourhood'] = df['Neighbourhood'].str.strip()

In [11]:
import numpy as np
df['Neighbourhood'] = np.where(df['Neighbourhood'] =='Not assigned', df['Borough'], df['Neighbourhood'])

### (3). Combine Neighbourhood with the same postal code

In [13]:
df2 = pd.DataFrame(df.groupby(['PostalCode','Borough'], as_index = False).agg(', '.join))
df2.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [14]:
df2.shape

(103, 3)

# 3. Get latitudes and longitudes

In [15]:
geodata = pd.read_csv('https://cocl.us/Geospatial_data')
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df3 = pd.concat([df2, geodata], axis=1).drop('Postal Code',axis = 1)
df3.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
df3.shape 

(103, 5)

# 4. Visualize all neighbourhoods in Toronto

In [18]:
# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

In [19]:
address = 'Toronto'

geolocator = Nominatim(user_agent="trt_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [20]:
import folium

# Create map of Toronto using latitude and longitude values
map_trt = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_trt)  
    
map_trt

# 5. Focus on the neighborhoods in downtown Toronto

In [5]:
dt_trt = df3[df3['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
dt_trt.head()

NameError: name 'df3' is not defined

### Visualize neighbourhoods in downtown Toronto

In [22]:
# Get the geographical coordinates of Downtown Toronto.

address = 'Downtown Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto are 43.6541737, -79.3808116451341.


In [23]:
# Create map of Downtown Toronto using latitude and longitude values
map_dt_trt = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(dt_trt['Latitude'], dt_trt['Longitude'], dt_trt['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dt_trt)  
    
map_dt_trt

### Use the Foursquare API to explore the neighborhoods and segment them.

In [24]:
CLIENT_ID = 'U10ZJS1TXTKPWOT5UBLAIVNARBH3AOVVEB3WWYAPBKERMBIG'
CLIENT_SECRET = '00RAH1VSAN5HGN5V1XX4ASG5MJR45NPHJRQ3R1LGMTQYK2AG'
VERSION = '20181110'

Explore the first neighborhood

In [25]:
neighborhood_latitude = dt_trt.loc[0, 'Latitude'] 
neighborhood_longitude = dt_trt.loc[0, 'Longitude'] 
neighborhood_name = dt_trt.loc[0, 'Neighbourhood']

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rosedale are 43.6795626, -79.37752940000001.


Get the top 100 venues that are in Rosedale within a radius of 500 meters.

In [26]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# Create the GET request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url 

'https://api.foursquare.com/v2/venues/explore?&client_id=U10ZJS1TXTKPWOT5UBLAIVNARBH3AOVVEB3WWYAPBKERMBIG&client_secret=00RAH1VSAN5HGN5V1XX4ASG5MJR45NPHJRQ3R1LGMTQYK2AG&v=20181110&ll=43.6795626,-79.37752940000001&radius=500&limit=100'

In [27]:
# Send the GET request 
results = requests.get(url).json()

In [28]:
# Extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Clean the json and structure it into a *pandas* dataframe.

In [29]:
venues = results['response']['groups'][0]['items']

from pandas.io.json import json_normalize
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Rosedale Park,Playground,43.682328,-79.378934
1,Whitney Park,Park,43.682036,-79.373788
2,Alex Murray Parkette,Park,43.6783,-79.382773
3,Milkman's Lane,Trail,43.676352,-79.373842


In [30]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


### Repeat the same process to all the neighborhoods in downtown Toronto.

In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Run the above function on each neighborhood and create a new dataframe.

In [32]:
dt_venues = getNearbyVenues(names = dt_trt['Neighbourhood'],
                            latitudes = dt_trt['Latitude'],
                            longitudes = dt_trt['Longitude'])                    

Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie


In [33]:
dt_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"Cabbagetown, St. James Town",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner


In [34]:
print(dt_venues.shape)

(1277, 7)


#### Count how many venues were returned for each neighborhood

In [35]:
dt_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",13,13,13,13,13,13
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,81,81,81,81,81,81
"Chinatown, Grange Park, Kensington Market",100,100,100,100,100,100
Christie,15,15,15,15,15,15
Church and Wellesley,87,87,87,87,87,87
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"Design Exchange, Toronto Dominion Centre",100,100,100,100,100,100


In [36]:
print('There are {} uniques categories of all the returned venues.'.format(len(dt_venues['Venue Category'].unique())))

There are 207 uniques categories of all the returned venues.


### Analyze each neighbourhood

In [37]:
dt_onehot = pd.get_dummies(dt_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dt_onehot['Neighbourhood'] = dt_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [dt_onehot.columns[-1]] + list(dt_onehot.columns[:-1])
dt_onehot = dt_onehot[fixed_columns]

dt_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"Cabbagetown, St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
dt_onehot.shape

(1277, 208)

#### Group rows by neighborhood, take the mean of the frequency of occurrence of each category

In [39]:
dt_grouped = dt_onehot.groupby('Neighbourhood').mean().reset_index()
dt_grouped.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.076923,0.076923,0.076923,0.153846,0.153846,0.153846,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012346,...,0.0,0.0,0.012346,0.0,0.0,0.012346,0.0,0.0,0.0,0.012346


In [40]:
dt_grouped.shape

(18, 208)

#### Print each neighborhood along with the top 5 most common venues

In [41]:
num_top_venues = 5

for hood in dt_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = dt_grouped[dt_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1             Café  0.05
2       Steakhouse  0.04
3              Bar  0.04
4  Thai Restaurant  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2  Italian Restaurant  0.04
3              Bakery  0.04
4                 Pub  0.04


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.15
1  Airport Terminal  0.15
2    Airport Lounge  0.15
3     Boat or Ferry  0.08
4   Harbor / Marina  0.08


----Cabbagetown, St. James Town----
                venue  freq
0         Coffee Shop  0.09
1          Restaurant  0.07
2                 Pub  0.04
3              Bakery  0.04
4  Italian Restaurant  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.17
1                Café  0.05
2  Italian Restaurant  0.05
3      

#### Sort the venues in descending order

In [42]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create the new dataframe and display the top 10 venues for each neighborhood.

In [43]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = dt_grouped['Neighbourhood']

for ind in np.arange(dt_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(dt_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Restaurant,Burger Joint,Hotel,Gym,Bakery
1,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Bakery,Steakhouse,Seafood Restaurant,Farmers Market,Cheese Shop,Café,Pub
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Lounge,Airport Service,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boat or Ferry,Harbor / Marina,Plane
3,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Park,Pub,Café,Italian Restaurant,Bakery,Pizza Place,Sandwich Place,Butcher
4,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Bar,Bubble Tea Shop,Burger Joint,Salad Place,Chinese Restaurant,Spa,Thai Restaurant


### Cluster neighbourhoods into 5 clusters

In [44]:
from sklearn.cluster import KMeans

In [45]:
kclusters = 5 # the number of clusters

dt_grouped_clustering = dt_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(dt_grouped_clustering)

kmeans.labels_[0:10] # cluster labels generated for each row in the dataframe

array([0, 0, 2, 3, 3, 0, 4, 0, 0, 0], dtype=int32)

#### Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [46]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dt_merged = dt_trt

# merge dt_grouped with dt_trt to add latitude/longitude for each neighborhood
dt_merged = dt_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), 
                           on ='Neighbourhood')

dt_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
1,M4X,Downtown Toronto,"Cabbagetown, St. James Town",43.667967,-79.367675,3,Coffee Shop,Restaurant,Park,Pub,Café,Italian Restaurant,Bakery,Pizza Place,Sandwich Place,Butcher
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Pub,Gastropub,Fast Food Restaurant,Mediterranean Restaurant,Men's Store
3,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,3,Coffee Shop,Pub,Park,Bakery,Café,Mexican Restaurant,Breakfast Spot,Theater,Yoga Studio,Farmers Market
4,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Burger Joint,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Tea Room,Theater


#### Visualize the resulting clusters

In [50]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [51]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dt_merged['Latitude'], dt_merged['Longitude'], dt_merged['Neighbourhood'], dt_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine each cluster

In [53]:
# Cluster 1
dt_merged.loc[dt_merged['Cluster Labels'] == 0, 
              dt_merged.columns[[1] + list(range(5, dt_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Gay Bar,Sushi Restaurant,Restaurant,Pub,Gastropub,Fast Food Restaurant,Mediterranean Restaurant,Men's Store
4,Downtown Toronto,0,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Burger Joint,Middle Eastern Restaurant,Japanese Restaurant,Italian Restaurant,Tea Room,Theater
5,Downtown Toronto,0,Coffee Shop,Restaurant,Hotel,Café,Breakfast Spot,Italian Restaurant,Cosmetics Shop,Cocktail Bar,Clothing Store,Park
6,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Restaurant,Bakery,Steakhouse,Seafood Restaurant,Farmers Market,Cheese Shop,Café,Pub
8,Downtown Toronto,0,Coffee Shop,Café,Bar,Steakhouse,Thai Restaurant,Restaurant,Burger Joint,Hotel,Gym,Bakery
10,Downtown Toronto,0,Coffee Shop,Hotel,Café,American Restaurant,Restaurant,Gastropub,Deli / Bodega,Lounge,Japanese Restaurant,Seafood Restaurant
11,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Seafood Restaurant,Bakery,Steakhouse,Gym,Italian Restaurant
12,Downtown Toronto,0,Café,Coffee Shop,Bar,Japanese Restaurant,Bookstore,Restaurant,Bakery,Chinese Restaurant,Dessert Shop,Pub
13,Downtown Toronto,0,Bar,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Vietnamese Restaurant,Bakery,Coffee Shop,Mexican Restaurant,Caribbean Restaurant,Ice Cream Shop
15,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Cocktail Bar,Seafood Restaurant,Hotel,Italian Restaurant,Pub,Fast Food Restaurant,Cosmetics Shop


In [54]:
# Cluster 2
dt_merged.loc[dt_merged['Cluster Labels'] == 1, 
              dt_merged.columns[[1] + list(range(5, dt_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Park,Playground,Trail,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [55]:
# Cluster 3
dt_merged.loc[dt_merged['Cluster Labels'] == 2, 
              dt_merged.columns[[1] + list(range(5, dt_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,2,Airport Lounge,Airport Service,Airport Terminal,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Boat or Ferry,Harbor / Marina,Plane


In [56]:
# Cluster 4
dt_merged.loc[dt_merged['Cluster Labels'] == 3, 
              dt_merged.columns[[1] + list(range(5, dt_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,3,Coffee Shop,Restaurant,Park,Pub,Café,Italian Restaurant,Bakery,Pizza Place,Sandwich Place,Butcher
3,Downtown Toronto,3,Coffee Shop,Pub,Park,Bakery,Café,Mexican Restaurant,Breakfast Spot,Theater,Yoga Studio,Farmers Market
7,Downtown Toronto,3,Coffee Shop,Italian Restaurant,Café,Bar,Bubble Tea Shop,Burger Joint,Salad Place,Chinese Restaurant,Spa,Thai Restaurant
9,Downtown Toronto,3,Coffee Shop,Hotel,Aquarium,Pizza Place,Italian Restaurant,Café,Scenic Lookout,Restaurant,Brewery,Music Venue


In [57]:
# Cluster 5
dt_merged.loc[dt_merged['Cluster Labels'] == 4, 
              dt_merged.columns[[1] + list(range(5, dt_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,4,Grocery Store,Café,Park,Restaurant,Baby Store,Diner,Nightclub,Italian Restaurant,Coffee Shop,Convenience Store
