# Scrape the table from the Wiki page with three columns

In [1]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import lxml.html as lh
import pandas as pd
import numpy as np

In [3]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [4]:
#Store the contents of the website under doc
doc = lh.fromstring(website_url.content)
#Parse data that are stored between <tr>..</tr> of HTML
tr_elements = doc.xpath('//tr')

In [5]:
#Check the length of the first 12 rows
[len(T) for T in tr_elements[:12]]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]

In [6]:
tr_elements = doc.xpath('//tr')
#Create empty list
col=[]
#For each row, store each first element (header) and an empty list
for t in tr_elements[0]:
    name=t.text_content().rstrip()
    col.append((name,[]))

In [7]:
#Since out first row is the header, data is stored on the second row onwards
for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    #If row is not of size 10, the //tr data is not from our table 
    if len(T)!=3:
        break
    
    #i is the index of our column
    i=0
    
    #Iterate through each element of the row
    for t in T.iterchildren():
        data=t.text_content() 
        #Check if row is empty
        if i>0:
        #Convert any numerical value to integers
            try:
                data=str(data).rstrip()
            except:
                pass
        #Append the data to the empty list of the i'th column
        col[i][1].append(data)
        #Increment i for the next column
        i+=1

In [8]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)

In [9]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### First, remove cells with a Borough that is Not assigned

In [10]:
df = df[df.Borough != 'Not assigned']

### If a row has a Borough but a Not assigned neighbourhood, then the neighbourhood will be set the same as the borough

In [11]:
df.loc[df['Neighbourhood'] == 'Not assigned', ['Neighbourhood']] = df['Borough']

### Combine the Neighbourhoods with the same Postcode, separate with comma

In [12]:
data = df.groupby(['Postcode', 'Borough'], as_index = False).agg({'Neighbourhood': ', '.join})

### Print the number of rows of dataframe

In [13]:
data.shape

(103, 3)

# Segmenting and Clustering - Kmeans

In [14]:
!conda install -c conda-forge geopy --yes 

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.2

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    scikit-learn-0.20.1        |   py36h22eb022_0         5.7 MB
    liblapack-3.8.0            |      11_openblas          10 KB  conda-forge
    numpy-1.18.1               |   py36h95a1406_0         5.2 MB  conda-forge
    liblapacke-3.8.0           |      11_openblas          10 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    libopenblas-0.3.6          |       h5a2b251_2         7.7 MB
    scipy-1.4.1                |   py36h921218d_0        

In [15]:
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.2

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



In [16]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
print('Libraries imported.')

Libraries imported.


In [49]:
# @hidden_cell
CLIENT_ID = 'ZQ20W2PK1HYQ0VLCTNKJCTRCTUSMAUSOLCSQOJXSYKPED0A0' # your Foursquare ID
CLIENT_SECRET = 'H5XKZHW5L5IHMQ1NH0QTUFMIVIVFEEGDVYMCZFSTCH030NP2' # your Foursquare Secret
VERSION = '20200210'
LIMIT = 30

In [18]:
loc = data['Borough'] + ', ' + data['Postcode']
addr = []
for loca in loc:
    addr.append(str(loca))

In [19]:
geolocator = Nominatim(user_agent="foursquare_agent")

In [20]:
new = data
for i in range(1,len(addr)):
    if geolocator.geocode(addr[i-1]) == None:
        new.drop(i-1, inplace = True)

In [21]:
new.drop([0,1], inplace = True)

In [22]:
new.reset_index().drop('index', axis = 1)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1G,Scarborough,Woburn
1,M1W,Scarborough,L'Amoreaux West
2,M2J,North York,"Fairview, Henry Farm, Oriole"
3,M2M,North York,"Newtonbrook, Willowdale"
4,M2N,North York,Willowdale South
5,M3A,North York,Parkwoods
6,M3C,North York,"Flemingdon Park, Don Mills South"
7,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights"
8,M3J,North York,"Northwood Park, York University"
9,M4L,East Toronto,"The Beaches West, India Bazaar"


In [23]:
loca = new['Borough'] + ', ' + new['Postcode']
add = []
for loc in loca:
    add.append(str(loc))

In [24]:
lat = np.zeros(len(add))
lon = np.zeros(len(add))
index = 0
for i in add:
    location = geolocator.geocode(i)
    lat[index] = location.latitude
    lon[index] = location.longitude
    index += 1

In [25]:
lata = pd.Series(lat)
long = pd.Series(lon)
redata = new.reset_index().drop('index', axis = 1)
redata['Latitude'] = lata
redata['Longitude'] = long

In [26]:
redata

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1G,Scarborough,Woburn,43.762669,-79.230861
1,M1W,Scarborough,L'Amoreaux West,43.773077,-79.257774
2,M2J,North York,"Fairview, Henry Farm, Oriole",43.754326,-79.449117
3,M2M,North York,"Newtonbrook, Willowdale",43.785962,-79.416031
4,M2N,North York,Willowdale South,43.754326,-79.449117
5,M3A,North York,Parkwoods,43.754326,-79.449117
6,M3C,North York,"Flemingdon Park, Don Mills South",43.732822,-79.346961
7,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",43.756199,-79.439802
8,M3J,North York,"Northwood Park, York University",43.754326,-79.449117
9,M4L,East Toronto,"The Beaches West, India Bazaar",43.67276,-79.304058


In [27]:
address = 'Toronto, TR'

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6561136, -79.392321.


In [28]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(redata['Latitude'], redata['Longitude'], redata['Borough'], redata['Neighbourhood']):
    label = '{}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Refer to the map.png in the same folder.

## Explore Neighborhoods in Toronto

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 'Neighbourhood Latitude', 'Neighbourhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=redata['Neighbourhood'],latitudes=redata['Latitude'],longitudes=redata['Longitude'])

Woburn
L'Amoreaux West
Fairview, Henry Farm, Oriole
Newtonbrook, Willowdale
Willowdale South
Parkwoods
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
The Beaches West, India Bazaar
Adelaide, King, Richmond
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
The Junction North, Runnymede
High Park, The Junction South
Runnymede, Swansea
Queen's Park
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips
Northwest


In [31]:
print(toronto_venues.shape)
toronto_venues.head()

(346, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Woburn,43.762669,-79.230861,Staples Cedarbrae,43.759851,-79.228335,Paper / Office Supplies Store
1,Woburn,43.762669,-79.230861,TD Canada Trust,43.76152,-79.226557,Bank
2,Woburn,43.762669,-79.230861,A&W,43.761066,-79.22529,Fast Food Restaurant
3,Woburn,43.762669,-79.230861,Pho Dau Bo,43.761286,-79.22517,Vietnamese Restaurant
4,Woburn,43.762669,-79.230861,Tim Hortons,43.760028,-79.226545,Coffee Shop


In [32]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",30,30,30,30,30,30
"Bathurst Manor, Downsview North, Wilson Heights",21,21,21,21,21,21
"Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe",5,5,5,5,5,5
"Brockton, Exhibition Place, Parkdale Village",1,1,1,1,1,1
"Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park",20,20,20,20,20,20
"Fairview, Henry Farm, Oriole",4,4,4,4,4,4
"Flemingdon Park, Don Mills South",30,30,30,30,30,30
"High Park, The Junction South",26,26,26,26,26,26
"Kingsview Village, Martin Grove Gardens, Richview Gardens, St. Phillips",5,5,5,5,5,5
L'Amoreaux West,30,30,30,30,30,30


In [33]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 118 uniques categories.


## Analyze Each Neighborhood

In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bank,Bar,...,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Woburn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Woburn,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,Woburn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Woburn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Woburn,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,American Restaurant,Arcade,Art Gallery,Arts & Crafts Store,Asian Restaurant,BBQ Joint,Bakery,Bank,Bar,...,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,...,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,...,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0
2,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Cloverdale, Islington, Martin Grove, Princess ...",0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0
5,"Fairview, Henry Farm, Oriole",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Flemingdon Park, Don Mills South",0.066667,0.0,0.0,0.0,0.0,0.0,0.033333,0.033333,0.033333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333
7,"High Park, The Junction South",0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.076923,...,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Kingsview Village, Martin Grove Gardens, Richv...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,L'Amoreaux West,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.033333,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.033333,0.0


In [36]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                venue  freq
0  Chinese Restaurant  0.07
1      Clothing Store  0.07
2          Poke Place  0.03
3       Shopping Mall  0.03
4          Comic Shop  0.03


----Bathurst Manor, Downsview North, Wilson Heights----
            venue  freq
0     Coffee Shop  0.10
1        Pharmacy  0.05
2  Sandwich Place  0.05
3     Gas Station  0.05
4   Shopping Mall  0.05


----Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe----
         venue  freq
0   Playground   0.2
1     Pharmacy   0.2
2  Coffee Shop   0.2
3       Garden   0.2
4  Supermarket   0.2


----Brockton, Exhibition Place, Parkdale Village----
                       venue  freq
0                  Pet Store   1.0
1        American Restaurant   0.0
2  Middle Eastern Restaurant   0.0
3                Pizza Place   0.0
4                   Pharmacy   0.0


----Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park----
                  venue  freq
0           Coffee Shop

In [37]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [38]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Clothing Store,Chinese Restaurant,Comic Shop,Restaurant,Ramen Restaurant,Poke Place,Plaza,Office,Neighborhood,Cosmetics Shop
1,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Sushi Restaurant,Park,Deli / Bodega,Pizza Place,Middle Eastern Restaurant,Diner,Restaurant,Sandwich Place,Community Center
2,"Bloordale Gardens, Eringate, Markland Wood, Ol...",Playground,Garden,Coffee Shop,Supermarket,Pharmacy,Gourmet Shop,Gastropub,Cosmetics Shop,Gym,Cupcake Shop
3,"Brockton, Exhibition Place, Parkdale Village",Pet Store,Women's Store,Flower Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant
4,"Cloverdale, Islington, Martin Grove, Princess ...",Coffee Shop,Grocery Store,Pizza Place,Sporting Goods Shop,Food & Drink Shop,Fast Food Restaurant,Cupcake Shop,Cosmetics Shop,Pharmacy,Chinese Restaurant


## Cluster Neighborhoods

In [39]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 3, 1, 4, 2, 4, 4, 3, 4], dtype=int32)

In [40]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [41]:
toronto_merged = redata

In [42]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1G,Scarborough,Woburn,43.762669,-79.230861,0,Taco Place,Vietnamese Restaurant,Coffee Shop,Paper / Office Supplies Store,Fast Food Restaurant,Bank,Women's Store,Filipino Restaurant,Falafel Restaurant,Electronics Store
1,M1W,Scarborough,L'Amoreaux West,43.773077,-79.257774,4,Clothing Store,Sandwich Place,Pharmacy,Gas Station,Wings Joint,Food Court,Fish & Chips Shop,Juice Bar,Gym,Mexican Restaurant
2,M2J,North York,"Fairview, Henry Farm, Oriole",43.754326,-79.449117,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store
3,M2M,North York,"Newtonbrook, Willowdale",43.785962,-79.416031,4,Korean Restaurant,Coffee Shop,Hot Dog Joint,Bus Line,Indian Restaurant,Intersection,Japanese Restaurant,Electronics Store,Park,Sandwich Place
4,M2N,North York,Willowdale South,43.754326,-79.449117,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store


In [43]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Refer to the clusters.png in the same folder

## Examine Clusters

### Cluster 1

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Taco Place,Vietnamese Restaurant,Coffee Shop,Paper / Office Supplies Store,Fast Food Restaurant,Bank,Women's Store,Filipino Restaurant,Falafel Restaurant,Electronics Store
13,York,0,Pizza Place,Beer Store,Park,Coffee Shop,Asian Restaurant,Supermarket,Filipino Restaurant,Fast Food Restaurant,Falafel Restaurant,Electronics Store


### Cluster 2

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,West Toronto,1,Pet Store,Women's Store,Flower Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant


### Cluster 3

In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,North York,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store
4,North York,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store
5,North York,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store
8,North York,2,Mediterranean Restaurant,Pizza Place,Coffee Shop,Middle Eastern Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Electronics Store


### Cluster 4

In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Etobicoke,3,Playground,Garden,Coffee Shop,Supermarket,Pharmacy,Gourmet Shop,Gastropub,Cosmetics Shop,Gym,Cupcake Shop
19,Etobicoke,3,Playground,Garden,Coffee Shop,Supermarket,Pharmacy,Gourmet Shop,Gastropub,Cosmetics Shop,Gym,Cupcake Shop


### Cluster 5

In [48]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,4,Clothing Store,Sandwich Place,Pharmacy,Gas Station,Wings Joint,Food Court,Fish & Chips Shop,Juice Bar,Gym,Mexican Restaurant
3,North York,4,Korean Restaurant,Coffee Shop,Hot Dog Joint,Bus Line,Indian Restaurant,Intersection,Japanese Restaurant,Electronics Store,Park,Sandwich Place
6,North York,4,American Restaurant,Restaurant,Pizza Place,Coffee Shop,Ice Cream Shop,Pet Store,Movie Theater,Middle Eastern Restaurant,Mexican Restaurant,Liquor Store
7,North York,4,Coffee Shop,Sushi Restaurant,Park,Deli / Bodega,Pizza Place,Middle Eastern Restaurant,Diner,Restaurant,Sandwich Place,Community Center
9,East Toronto,4,Coffee Shop,Pub,BBQ Joint,Bakery,Bus Stop,Portuguese Restaurant,Jewelry Store,Japanese Restaurant,Restaurant,Salon / Barbershop
10,Downtown Toronto,4,Clothing Store,Chinese Restaurant,Comic Shop,Restaurant,Ramen Restaurant,Poke Place,Plaza,Office,Neighborhood,Cosmetics Shop
11,West Toronto,4,Bar,Cocktail Bar,Taco Place,Italian Restaurant,American Restaurant,Food Court,New American Restaurant,Dessert Shop,Park,Chinese Restaurant
14,West Toronto,4,Café,Mexican Restaurant,Thai Restaurant,Bar,Grocery Store,Coffee Shop,Cajun / Creole Restaurant,Flea Market,Seafood Restaurant,Fried Chicken Joint
15,West Toronto,4,Coffee Shop,Bakery,Café,Pizza Place,Pet Store,Shoe Store,Pharmacy,Gourmet Shop,Park,Liquor Store
16,Downtown Toronto,4,Coffee Shop,Pub,Park,Mediterranean Restaurant,Restaurant,Café,Poutine Place,Pizza Place,Mexican Restaurant,Fast Food Restaurant
