# Segmenting and Clustering of Neighborhoods in Toronto
Assignment for Coursera course "IBM Applied Data Science Capstone" week 3

### Import required packages

In [1]:
import pandas as pd
import numpy as np
import urllib3
from bs4 import BeautifulSoup
import geocoder
import folium
import json
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# 1. Read input data from website and preprocessing data 

## 1.1 Read data from website

### GET request with urllib3

In [2]:
# Instanciate PoolManager object
http = urllib3.PoolManager()

# Specify the url for request
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

# Sent GET request to website and store the returned data (html format) to the variable 'page'
page = http.request('GET', url)

### Extract required data from HTML

In [3]:
# Parse the html using BeautifulSoup and return output to the variable 'page_content' 
page_content = BeautifulSoup(page.data, 'html.parser')

# Check if the content of the website has been fetched correctly and search for the tag with which the required
# information is displayed
print(page_content.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"f0c2f8b7-fef6-4a14-8984-aea9145b817c","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":979555370,"wgRevisionId":979555370,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communicati

### The required information is displayed in the table tag with the class name '__wikitable sortable__'

In [4]:
# Extract the table (<table class="wikitable sortable">) and check the table content

table = page_content.find('table', attrs={'class': 'wikitable sortable'})
print(table.prettify())

<table class="wikitable sortable">
 <tbody>
  <tr>
   <th>
    Postal Code
   </th>
   <th>
    Borough
   </th>
   <th>
    Neighbourhood
   </th>
  </tr>
  <tr>
   <td>
    M1A
   </td>
   <td>
    Not assigned
   </td>
   <td>
    Not assigned
   </td>
  </tr>
  <tr>
   <td>
    M2A
   </td>
   <td>
    Not assigned
   </td>
   <td>
    Not assigned
   </td>
  </tr>
  <tr>
   <td>
    M3A
   </td>
   <td>
    North York
   </td>
   <td>
    Parkwoods
   </td>
  </tr>
  <tr>
   <td>
    M4A
   </td>
   <td>
    North York
   </td>
   <td>
    Victoria Village
   </td>
  </tr>
  <tr>
   <td>
    M5A
   </td>
   <td>
    Downtown Toronto
   </td>
   <td>
    Regent Park, Harbourfront
   </td>
  </tr>
  <tr>
   <td>
    M6A
   </td>
   <td>
    North York
   </td>
   <td>
    Lawrence Manor, Lawrence Heights
   </td>
  </tr>
  <tr>
   <td>
    M7A
   </td>
   <td>
    Downtown Toronto
   </td>
   <td>
    Queen's Park, Ontario Provincial Government
   </td>
  </tr>
  <tr>
   <td>
    M8

In [5]:
# Define header
header = ['Postal Code', 'Borough', 'Neighborhood']

# Extract content of the table and store the result in a Pandas DataFrame
table_rows = table.find_all('tr')

content = []
for tr in table_rows:
    td = tr.find_all('td')
    row_content = [tr.text.strip() for tr in td if tr.text.strip()]
    if row_content:
        content.append(row_content)

# Store result in a Pandas DataFrame
df = pd.DataFrame(content, columns=header)
print('df shape:',df.shape)
df.head(10)

df shape: (180, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


__Result:__ Dataframe of raw input data set with 180 rows

## 1.2 Preprocessing of DataFrame 

### 1.2.1 Create DataFrame only containing rows with assigned 'Borough'

In [6]:
df = df[df['Borough'].ne('Not assigned')]
df.reset_index(drop=True, inplace=True)

print('df shape:',df.shape)
df.head(10)

df shape: (103, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


__Result:__ Number of rows in the DataFrame reduced from 180 to 103.<br><br>
DataFrame with 103 rows and 3 columns

### 1.2.2 Merge rows with same Postal Code

In [None]:
# Sort DataFrame by column 'Postal Code' and reset the index
df = df.sort_values(by='Postal Code')
df.reset_index(drop=True, inplace=True)

# Set first entry in the summarized list 
list_sum = [[df.iloc[0][0],df.iloc[0][1],df.iloc[0][2]]]

# Iterate through the rows of the (sorted) Dataframe and fill the summarized list (WITHOUT Postel Code duplicates)
# Add a new row to list_sum if the Postal Code in consecutive rows is NOT identical else add Borough and Neighborhood
# to the columns of the already existing row for the Postal code in the summarized list 

for i in range(1,df.shape[0]):
    if df.iloc[i][0] != list_sum[-1][0]:
        list_sum.append([df.iloc[i][0],df.iloc[i][1],df.iloc[i][2]])
    else:
        list_sum[-1][1] = list_sum[-1][1] + ', ' + df.iloc[i][1]
        list_sum[-1][2] = list_sum[-1][2] + ', ' + df.iloc[i][2]

# Convert summarized list to Pandas DataFrame
df = pd.DataFrame(list_sum, columns=header)
print('df shape:',df.shape)
df.head(10)

__Result:__ There are no rows with the same postal code in the DataFrame. Therefore no row needed to be modified.<br><br>
DataFrame with 103 rows and 3 columns

### 1.2.3 Replace entries 'Not assigned' in cells of column 'Neighborhood' with corresponding 'Borough'

In [7]:
corr_count = 0

for idx in df.index:
    if df['Neighborhood'][idx].strip() == 'Not assigned':
        df['Neighborhood'][idx] = df['Borough'][idx]
        corr_count = corr_count + 1

print(corr_count, ' rows with column \'Neighborhood\' entry == \'Not assigned\' found and updated')
print('')
print('df shape:',df.shape)
df.head(10)

0  rows with column 'Neighborhood' entry == 'Not assigned' found and updated

df shape: (103, 3)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


__Result:__ All rows in the input data set with a borough assigned also had a neighborhood assigned. Therefore no row needed to be modified.<br><br>
DataFrame with 103 rows and 3 columns

# 2. Add geographical coordinates for each Postal code / Neighborhood 

## 2.1 Geographical coordinates using Geocoder and ArcGIS API (instead of Google API)
As the Google API doesn't work reliably the __ArcGIS API__ is used to obtain the geographical coordinates corresponding to the __Postal Code__ / __Neighborhood__ in each row of the DataFrame. The query used for the call via ArcGIS API is of the format:<br><br>
g = geocoder.arcgis('{__Postal Code__}, {__Borough__}, Toronto, Canada')<br><br>

If the query was successful the laltitude (__g.lat__) and longitude (__g.lng__) are stored along with the Postal Code, the Borough and the Neighborhood in a list. Occationally the query leads to no result due to server time out. To address this problem the request is re-sent again until a result (g not __None__) for the coordinates has been received. To avoid an infinite loop of re-sending the query for a particular address maximal 5 queries are sent before the result for the coordinates is set to be 'Not found'.<br><br>
The final list is stored in a Pandas Dataframe.

In [9]:
# Initialize the list for the results
geo_coord_list = []

# Maximum number of trials for sending a query for a particular address 
max_trial = 5

# Total amount of queries where no coordinates have been found for the given address
address_not_found = 0

# Loop through the rows in the input DataFrame 
for row in range(0, len(df)):
    
    # Set inital variables for each new row
    success = False
    trial = 1
    geo_coord_list.append([df['Postal Code'][row],df['Borough'][row],df['Neighborhood'][row],'Not found','Not found'])
    
    # Query API until successful result is obtained or the maximum amount of trails is reached
    while (success == False) and (trial <= max_trial):
        
        query = '{}, {}, Toronto, Canada'.format(df['Postal Code'][row], df['Borough'][row])
        g = geocoder.arcgis(query)
        
        # Query successfull, coordinates g.lat and g.lng stored in the current row of the geo_coord_list --> exit while loop    
        if g.lat and g.lng:
            geo_coord_list[row][3] = g.lat
            geo_coord_list[row][4] = g.lng
            success = True
            
        # Query failed (g = None) --> do while loop again
        else:
            trial = trial + 1
            if trial > max_trial:
                address_not_found = address_not_found + 1


# Print summary and coverting geo_coord_list to Pandas DataFrame 
print(len(df)-address_not_found,'/',len(df),'Geographical coordinates found for the given input data set.')
print('')

df_toronto_neighborhood_list = pd.DataFrame(geo_coord_list,
                                            columns=['Postal Code', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'])

print('df_toronto_neighborhood_list shape: ',df_toronto_neighborhood_list.shape)
df_toronto_neighborhood_list.head(10)

103 / 103 Geographical coordinates found for the given input data set.

df_toronto_neighborhood_list shape:  (103, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747
3,M1G,Scarborough,Woburn,43.76812,-79.21761
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892
5,M1J,Scarborough,Scarborough Village,43.74446,-79.23117
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72582,-79.26461
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71289,-79.28506
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6951,-79.26466


__Result:__ For each postal code the corresponding geographical coordinates (latitude and longitude) have been obtained.<br><br>
DataFrame with 103 rows and 5 columns. Columns 'Latitude' and 'Longitude' have been added.

## 2.2 Geographical coordinates using input file (alternative solution)
Merging the DataFrame containing __Postal Code__, __Borough__ and __Neighborhood__ with the DataFrame containing __Postal Code__, __Latitude__ and __Longitude__ according to the corresponding __Postal Code__<br><br>
__Note:__ The following steps have been carried out to demonstrate the concept. The obtained DataFrame is NOT used for the subsequent analysis.

In [10]:
# Read data from provided input file
df_coord = pd.read_csv('../data/Geospatial_Coordinates.csv')
df_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# Merge two DataFrames
df_merged = pd.merge(df,df_coord,how='left', on='Postal Code')
print('df_merged shape :', df_merged.shape)
df_merged

df_merged shape : (103, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


__Result:__ DataFrame with 103 rows and 5 columns. Columns 'Latitude' and 'Longitude' have been added.

### 2.3 Display the results in a map of Toronto
__Note:__ Only the results obtained in section '2.1 Geographical coordinates using Geocoder and ArcGIS API (instead of Google API)' are being displayed

In [12]:
# Obtain geographical coordinates of Toronto
g = geocoder.arcgis('Toronto, Canada')
lat_Toronto = g.lat
lon_Toronto = g.lng
print('The geograpical coordinates of Toronto are {}, {}.'.format(lat_Toronto, lon_Toronto))

The geograpical coordinates of Toronto are 43.648690000000045, -79.38543999999996.


In [13]:
# Create a map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[lat_Toronto, lon_Toronto],
                         tiles='Stamen Terrain',
                         zoom_start=10)

tooltip_label = 'Click for details!'

# Add markers for 'Postal code' with corresponding 'Borough' to the map
for postalcode, borough, lat, lng in zip(df_toronto_neighborhood_list['Postal Code'],
                                         df_toronto_neighborhood_list['Borough'],
                                         df_toronto_neighborhood_list['Latitude'],
                                         df_toronto_neighborhood_list['Longitude']):
    
    label = '<b>{}</b>, {}'.format(borough, postalcode)
    popup_label = folium.Popup(label, parse_html=False)
    
    folium.CircleMarker([lat, lng],
                        radius=4,
                        popup=label,
                        color='green',
                        fill=True,
                        fill_color='green',
                        fill_opacity=1.0,
                        parse_html=False
                       ).add_to(map_toronto)
    

# Display resulting map  
map_toronto

# 3. Exploring and clustering neighborhoods of Toronto

## 3.1 Clustering of neighborhoods of Toronto by boroughs
__Note:__ Section 3.1 is not part of the assignment. Only for demostration.

### 3.1.1 Geographical coordinates by average of geographical coordinates of Postal Codes belonging to a Borough using ArcGIS API

In [14]:
# Extract columns 'Borough','Latitude' and 'Longitude' from the full list df_toronto_neighborhood_list including all the
# geographical coordinates corresponding to the 'Postal Code'.
# By grouping this list by 'Borough' and calculating the mean of 'Latitude' and 'Longitude' the geographical coordinates 
# of the center (centroid) of each Borough is calculated as the average of the 'Latitude' and 'Longitude' for each
# 'Postal Code' belonging to this Borough.

df_borough_center = df_toronto_neighborhood_list[['Borough','Latitude','Longitude']].groupby('Borough').mean()
df_borough_center.reset_index(inplace=True)
print('df_borough_center shape: ',df_borough_center.shape)
df_borough_center.head(10)

df_borough_center shape:  (10, 3)


Unnamed: 0,Borough,Latitude,Longitude
0,Central Toronto,43.609727,-79.492842
1,Downtown Toronto,43.655587,-79.384282
2,East Toronto,43.667926,-79.337134
3,East York,43.699086,-79.333092
4,Etobicoke,43.660261,-79.53972
5,Mississauga,43.58726,-79.64494
6,North York,43.749968,-79.430223
7,Scarborough,43.767364,-79.248238
8,West Toronto,43.65171,-79.445265
9,York,43.69133,-79.47311


### 3.1.2 Unique colour coding for individual boroughs

In [15]:
# Initialize list for unique colour coding for each borough
rgb_colours = []
boroughs_colour_code = []

# Loop through the rows of the borough_list and creating an unique random RGB colour code for each borough
#for idx in range(0, len(borough_list)):

# Loop through the rows of the df_borough_center and creating an unique random RGB colour code for each borough
for idx in df_borough_center.index:

    # Create a random RGB colour code
    red = np.random.default_rng().integers(low=0, high=255, endpoint=True)
    green = np.random.default_rng().integers(low=0, high=255, endpoint=True)
    blue = np.random.default_rng().integers(low=0, high=255, endpoint=True)
    rgb = 'rgb({},{},{})'.format(str(red),str(green),str(blue))
    
    # Create new random RGB colour code if this colour code already exist in the rgb_colours list  
    while rgb in rgb_colours:
        red = np.random.default_rng().integers(low=0, high=255, endpoint=True)
        green = np.random.default_rng().integers(low=0, high=255, endpoint=True)
        blue = np.random.default_rng().integers(low=0, high=255, endpoint=True)
        rgb = 'rgb({},{},{})'.format(str(red),str(green),str(blue))
    
    # Append new UNIQUE RGB colour code to the list
    rgb_colours.append(rgb)
    boroughs_colour_code.append([df_borough_center['Borough'][idx],rgb])

# Convert the list of 'Borough' with corresponding 'Borough RBG code' to a Pandas Dataframe (df_boroughs_colour_code)
df_boroughs_colour_code = pd.DataFrame(boroughs_colour_code, columns=['Borough', 'Borough RGB code'])
print('df_boroughs_colour_code shape: ',df_boroughs_colour_code.shape)
df_boroughs_colour_code.head(10)

df_boroughs_colour_code shape:  (10, 2)


Unnamed: 0,Borough,Borough RGB code
0,Central Toronto,"rgb(16,107,42)"
1,Downtown Toronto,"rgb(127,45,100)"
2,East Toronto,"rgb(70,146,129)"
3,East York,"rgb(209,65,158)"
4,Etobicoke,"rgb(226,75,68)"
5,Mississauga,"rgb(78,83,34)"
6,North York,"rgb(31,35,20)"
7,Scarborough,"rgb(161,13,242)"
8,West Toronto,"rgb(42,171,108)"
9,York,"rgb(200,202,222)"


### 3.1.3 Add column 'Borough RGB code' to DataFrame containing the geographical coordinates of all Postal codes 
(see __2.1 Geographical coordinates using Geocoder and ArcGIS API__)

In [16]:
# Merge Dataframe df_toronto_neighborhood_list and df_boroughs_colour_code
df_toronto_neighborhood_list_colour = pd.merge(df_toronto_neighborhood_list,df_boroughs_colour_code,how='left', on='Borough')
df_toronto_neighborhood_list_colour.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Borough RGB code
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662,"rgb(161,13,242)"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,"rgb(161,13,242)"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747,"rgb(161,13,242)"
3,M1G,Scarborough,Woburn,43.76812,-79.21761,"rgb(161,13,242)"
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892,"rgb(161,13,242)"
5,M1J,Scarborough,Scarborough Village,43.74446,-79.23117,"rgb(161,13,242)"
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72582,-79.26461,"rgb(161,13,242)"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71289,-79.28506,"rgb(161,13,242)"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496,"rgb(161,13,242)"
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6951,-79.26466,"rgb(161,13,242)"


### 3.1.4 Add column 'Borough RGB code' to DataFrame obtained by the average of geographical coordinates of Postal Codes belonging to a Borough

In [17]:
# Merge Dataframe df_borough_center and df_boroughs_colour_code
df_borough_center_colour = pd.merge(df_borough_center,df_boroughs_colour_code,how='left', on='Borough')
df_borough_center_colour.head(10)

Unnamed: 0,Borough,Latitude,Longitude,Borough RGB code
0,Central Toronto,43.609727,-79.492842,"rgb(16,107,42)"
1,Downtown Toronto,43.655587,-79.384282,"rgb(127,45,100)"
2,East Toronto,43.667926,-79.337134,"rgb(70,146,129)"
3,East York,43.699086,-79.333092,"rgb(209,65,158)"
4,Etobicoke,43.660261,-79.53972,"rgb(226,75,68)"
5,Mississauga,43.58726,-79.64494,"rgb(78,83,34)"
6,North York,43.749968,-79.430223,"rgb(31,35,20)"
7,Scarborough,43.767364,-79.248238,"rgb(161,13,242)"
8,West Toronto,43.65171,-79.445265,"rgb(42,171,108)"
9,York,43.69133,-79.47311,"rgb(200,202,222)"


### 3.1.5 Display neigborhoods of Toronto clustered by boroughs and their center

In [18]:
# Create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[lat_Toronto, lon_Toronto],
                         tiles='Stamen Terrain',
                         zoom_start=11)

tooltip_label = 'Click for details!'

# Add markers for 'Postal code' with corresponding 'Borough' to the map
for bor, lat, lng, bor_col, p_code, neighbor in zip(df_toronto_neighborhood_list_colour['Borough'],
                                                    df_toronto_neighborhood_list_colour['Latitude'],
                                                    df_toronto_neighborhood_list_colour['Longitude'],
                                                    df_toronto_neighborhood_list_colour['Borough RGB code'],
                                                    df_toronto_neighborhood_list_colour['Postal Code'],
                                                    df_toronto_neighborhood_list_colour['Neighborhood']):
    
    label = '<b>{} {}</b>, {}'.format(p_code, bor, neighbor)
    popup_label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker([lat, lng],
                        radius=6,
                        popup=label,
                        color=None,#'green',
                        fill=True,
                        fill_color=bor_col,
                        fill_opacity=1.0,
                        parse_html=False).add_to(map_toronto)

# Add markers for center of 'Borough'
for bor, lat, lng, bor_col in zip(df_borough_center_colour['Borough'],
                                  df_borough_center_colour['Latitude'],
                                  df_borough_center_colour['Longitude'],
                                  df_borough_center_colour['Borough RGB code']):
    
    label = '<b>{}</b>, center'.format(bor)
    popup_label = folium.Popup(label, parse_html=True)
    
    folium.Marker([lat,lng],
                  popup=popup_label,
                  tooltip=tooltip_label,
                  icon=folium.Icon(color='blue',
                                   prefix='fa',
                                   icon='circle',
                                   icon_color=bor_col),
                 ).add_to(map_toronto)

# Display complete map
map_toronto

## 3.2 Exploring and clustering of neighborhoods of Toronto by venues

### 3.2.1 Define credentials for FOURSQUARE API

In [19]:
CLIENT_ID = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # Foursquare ID
CLIENT_SECRET = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # Foursquare Secret

VERSION = '20180605' # Foursquare API version

__Note:__ The personal Credentials used for this API have been blanked out for security reasons.

### 3.2.3 Function to search for venues in a surounding area of a location with given geographical coordinates using FOURSQUARE API
__Note:__ Default values for __radius__ = 500 [m] and __LIMIT__ = 100 (top 100 venues)

In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
               
        # GET request using urllib3
        get_request = urllib3.PoolManager().request('GET', url)
        response = json.loads(get_request.data.decode('utf-8'))
        
        http_status_code = response['meta']['code']        
        found_venues = response['response']['totalResults']        
        results = response['response']['groups'][0]['items']
        
        if http_status_code == 200:
            if found_venues > 0:
                # return only relevant information for each nearby venue
                venues_list.append([(name,
                                     lat,
                                     lng,
                                     v['venue']['name'],
                                     v['venue']['location']['lat'],
                                     v['venue']['location']['lng'],
                                     v['venue']['categories'][0]['name']) for v in results])
                
                print('Request successful!',found_venues,'venues found in area:', name)
            else:
                print('Request successful! But NO venues found within radius of',radius,'m in area:',name)  
        else:
            print('Request failed! No information avaialable in area:',name)

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Neighborhood Latitude',
                             'Neighborhood Longitude',
                             'Venue',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue Category']
    
    return(nearby_venues)

### 3.2.4 Search venues for each neighborhood of Toronto

In [21]:
toronto_venues = getNearbyVenues(names=df_toronto_neighborhood_list['Neighborhood'],
                                 latitudes=df_toronto_neighborhood_list['Latitude'],
                                 longitudes=df_toronto_neighborhood_list['Longitude'],
                                 radius=500,
                                 LIMIT=100
                                )

print('')
print('toronto_venues shape: ',toronto_venues.shape)
print('')
toronto_venues.head(10)

Request successful! 3 venues found in area: Malvern, Rouge
Request successful! 2 venues found in area: Rouge Hill, Port Union, Highland Creek
Request successful! 3 venues found in area: Guildwood, Morningside, West Hill
Request successful! 4 venues found in area: Woburn
Request successful! 1 venues found in area: Cedarbrae
Request successful! 5 venues found in area: Scarborough Village
Request successful! 3 venues found in area: Kennedy Park, Ionview, East Birchmount Park
Request successful! 10 venues found in area: Golden Mile, Clairlea, Oakridge
Request successful! 8 venues found in area: Cliffside, Cliffcrest, Scarborough Village West
Request successful! 4 venues found in area: Birch Cliff, Cliffside West
Request successful! 2 venues found in area: Dorset Park, Wexford Heights, Scarborough Town Centre
Request successful! 1 venues found in area: Wexford, Maryvale
Request successful! 16 venues found in area: Agincourt
Request successful! 16 venues found in area: Clarks Corners, Tam O'

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.81139,-79.19662,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Malvern, Rouge",43.81139,-79.19662,Wood Bison Paddock,43.811732,-79.200708,Zoo Exhibit
2,"Malvern, Rouge",43.81139,-79.19662,Shyne Display Group,43.80904,-79.199596,Furniture / Home Store
3,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,SEBS Engineering Inc. (Sustainable Energy and ...,43.782371,-79.15682,Construction & Landscaping
4,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,Royal Canadian Legion,43.782533,-79.163085,Bar
5,"Guildwood, Morningside, West Hill",43.76575,-79.1747,Homestead Roofing Repair,43.76514,-79.178663,Construction & Landscaping
6,"Guildwood, Morningside, West Hill",43.76575,-79.1747,Heron Park Community Centre,43.768867,-79.176958,Gym / Fitness Center
7,"Guildwood, Morningside, West Hill",43.76575,-79.1747,Heron Park,43.769327,-79.177201,Park
8,Woburn,43.76812,-79.21761,Starbucks,43.770037,-79.221156,Coffee Shop
9,Woburn,43.76812,-79.21761,cheapOseo,43.766042,-79.218539,Business Service


### 3.2.5 Check how many venues have been returned for each neighborhood

In [22]:
toronto_venues.groupby('Neighborhood').count()['Venue Category']

Neighborhood
Agincourt                            16
Alderwood, Long Branch                4
Bayview Village                       4
Bedford Park, Lawrence Manor East    19
Berczy Park                          60
                                     ..
Willowdale, Willowdale West           5
Woburn                                4
Woodbine Heights                     18
York Mills West                       3
York Mills, Silver Hills              1
Name: Venue Category, Length: 97, dtype: int64

### 3.2.6 Check how many unique categories can be curated from all the returned venues

In [23]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 266 uniques categories.


### 3.2.7 Analyze each neighborhood

In [24]:
# One hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# Add Neighborhood column back to DataFrame
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# Move Neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print('toronto_onehot shape:', toronto_onehot.shape)
toronto_onehot.head(10)

toronto_onehot shape: (2324, 266)


Unnamed: 0,Zoo Exhibit,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### 3.2.8 Group rows by 'Neighborhood' and take the mean of the frequency of occurrence of each category

In [25]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

print('toronto_grouped shape:', toronto_grouped.shape)
toronto_grouped

toronto_grouped shape: (97, 266)


Unnamed: 0,Neighborhood,Zoo Exhibit,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.000000
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
2,Bayview Village,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
4,Berczy Park,0.0,0.0,0.0,0.0,0.016667,0.0,0.016667,0.0,0.000000,...,0.016667,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.016667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
93,Woburn,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
94,Woodbine Heights,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.055556,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000
95,York Mills West,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000000


### 3.2.9 Print each neighborhood along with the top 5 most common venues

In [26]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                 venue  freq
0   Chinese Restaurant  0.12
1         Skating Rink  0.06
2        Shopping Mall  0.06
3  Shanghai Restaurant  0.06
4     Sushi Restaurant  0.06


----Alderwood, Long Branch----
                             venue  freq
0                Convenience Store  0.25
1                              Pub  0.25
2            Performing Arts Venue  0.25
3                              Gym  0.25
4  Molecular Gastronomy Restaurant  0.00


----Bayview Village----
                        venue  freq
0  Construction & Landscaping  0.25
1          Golf Driving Range  0.25
2                       Trail  0.25
3                        Park  0.25
4                 Zoo Exhibit  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0      Sandwich Place  0.11
1         Coffee Shop  0.11
2  Italian Restaurant  0.11
3           Juice Bar  0.05
4        Liquor Store  0.05


----Berczy Park----
                venue  freq
0         Coffee Shop  0.

                 venue  freq
0          Music Venue   0.2
1             Bus Line   0.2
2          Pizza Place   0.2
3       Clothing Store   0.2
4  Arts & Crafts Store   0.2


----Lawrence Manor, Lawrence Heights----
                    venue  freq
0          Clothing Store  0.22
1  Furniture / Home Store  0.06
2           Women's Store  0.04
3              Food Court  0.04
4     American Restaurant  0.04


----Lawrence Park----
               venue  freq
0               Park  0.38
1  Convenience Store  0.25
2      Grocery Store  0.12
3               Bank  0.12
4     Breakfast Spot  0.12


----Leaside----
                    venue  freq
0             Coffee Shop  0.09
1     Sporting Goods Shop  0.09
2                    Bank  0.06
3  Furniture / Home Store  0.06
4            Burger Joint  0.06


----Little Portugal, Trinity----
                   venue  freq
0           Cocktail Bar  0.07
1            Coffee Shop  0.07
2                    Bar  0.07
3             Restaurant  0.04
4  Vi

4                  Café  0.04


----Willowdale, Willowdale West----
           venue  freq
0    Coffee Shop   0.2
1           Park   0.2
2    Pizza Place   0.2
3        Butcher   0.2
4  Grocery Store   0.2


----Woburn----
                        venue  freq
0  Construction & Landscaping  0.25
1                 Coffee Shop  0.25
2                        Park  0.25
3            Business Service  0.25
4         Moroccan Restaurant  0.00


----Woodbine Heights----
            venue  freq
0        Bus Line  0.11
1   Grocery Store  0.11
2        Pharmacy  0.11
3       Pet Store  0.06
4  Breakfast Spot  0.06


----York Mills West----
               venue  freq
0  Convenience Store  0.33
1               Park  0.33
2          Speakeasy  0.33
3              Plaza  0.00
4         Playground  0.00


----York Mills, Silver Hills----
          venue  freq
0          Park   1.0
1   Zoo Exhibit   0.0
2        Museum   0.0
3  Noodle House   0.0
4     Nightclub   0.0




### 3.2.10 Function to sort the venues in descending order

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### 3.2.11 Create new DataFrame and display the top 10 venues for each neighborhood

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Discount Store,Department Store,Skating Rink,Shopping Mall,Shanghai Restaurant,Supermarket,Sushi Restaurant,Bakery,Badminton Court
1,"Alderwood, Long Branch",Performing Arts Venue,Convenience Store,Pub,Gym,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
2,Bayview Village,Golf Driving Range,Construction & Landscaping,Trail,Park,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
3,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Café,Restaurant,Juice Bar,Liquor Store,Thai Restaurant,Sports Club,Pub
4,Berczy Park,Coffee Shop,Cheese Shop,Restaurant,Cocktail Bar,Seafood Restaurant,Bakery,Farmers Market,Breakfast Spot,Beer Bar,Shopping Mall
5,"Birch Cliff, Cliffside West",General Entertainment,Skating Rink,College Stadium,Café,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant
6,"Brockton, Parkdale Village, Exhibition Place",Bar,Coffee Shop,Café,Restaurant,Sandwich Place,Gift Shop,Nightclub,French Restaurant,Furniture / Home Store,Bakery
7,"Business reply mail Processing Centre, South C...",Coffee Shop,Hotel,Restaurant,Café,Italian Restaurant,Bar,Asian Restaurant,Seafood Restaurant,Theater,Sandwich Place
8,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Café,Coffee Shop,French Restaurant,Bar,Park,Restaurant,Lounge,Sandwich Place,Speakeasy
9,Caledonia-Fairbanks,Mexican Restaurant,Gym,Café,Beer Store,Park,Sporting Goods Shop,Bakery,Farmers Market,Fast Food Restaurant,Farm


## 3.3 Cluster neighborhoods of Toronto

### 3.3.1 Run _k_-means to cluster the neighborhoods into 5 clusters

In [29]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:20] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1])

### 3.3.2 Creating a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [30]:
# Add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# Merge df_toronto_neighborhood_list with neighborhoods_venues_sorted to add latitude/longitude for each neighborhood
toronto_merged = pd.merge(df_toronto_neighborhood_list,neighborhoods_venues_sorted,how='left', on='Neighborhood')

# Remove rows with no results found (NaN) and reset column 'Cluster Labels' back to integer
toronto_merged.dropna(inplace=True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype(int)

toronto_merged.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.81139,-79.19662,0,Zoo Exhibit,Fast Food Restaurant,Furniture / Home Store,Dog Run,Field,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.78574,-79.15875,3,Construction & Landscaping,Bar,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.76575,-79.1747,1,Construction & Landscaping,Gym / Fitness Center,Park,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space
3,M1G,Scarborough,Woburn,43.76812,-79.21761,0,Construction & Landscaping,Park,Business Service,Coffee Shop,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space
4,M1H,Scarborough,Cedarbrae,43.76944,-79.23892,4,Trail,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space,Dog Run
5,M1J,Scarborough,Scarborough Village,43.74446,-79.23117,0,Park,Spa,Grocery Store,Restaurant,Indian Restaurant,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.72582,-79.26461,0,Hobby Shop,Department Store,Coffee Shop,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Falafel Restaurant
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.71289,-79.28506,0,Bakery,Bus Line,Intersection,Bus Station,Metro Station,Soccer Field,Coffee Shop,Fast Food Restaurant,Farmers Market,Field
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.7236,-79.23496,0,Ice Cream Shop,Coffee Shop,Pharmacy,Discount Store,Sandwich Place,Pizza Place,Hardware Store,Farm,Falafel Restaurant,Event Space
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6951,-79.26466,0,General Entertainment,Skating Rink,College Stadium,Café,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant


### 3.3.3 Display results

In [31]:
# create map
map_clusters = folium.Map(location=[lat_Toronto, lon_Toronto],
                          tiles='Stamen Terrain',
                          zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, neighbor, cluster in zip(toronto_merged['Latitude'],
                                       toronto_merged['Longitude'],
                                       toronto_merged['Neighborhood'],
                                       toronto_merged['Cluster Labels']):
   
    label = '<b>{}</b>, Cluster {}'.format(neighbor,cluster)
    popup_label = folium.Popup(label, parse_html=True)
    
    folium.CircleMarker([lat, lon],
                        radius=5,
                        popup=label,
                        color=rainbow[cluster-1],
                        fill=True,
                        fill_color=rainbow[cluster-1],
                        fill_opacity=1.0).add_to(map_clusters)
       
map_clusters

### 3.3.4 Examine clusters

#### Cluster 1

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,0,Zoo Exhibit,Fast Food Restaurant,Furniture / Home Store,Dog Run,Field,Farmers Market,Farm,Falafel Restaurant,Event Space,Ethiopian Restaurant
3,Scarborough,0,Construction & Landscaping,Park,Business Service,Coffee Shop,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Event Space
5,Scarborough,0,Park,Spa,Grocery Store,Restaurant,Indian Restaurant,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
6,Scarborough,0,Hobby Shop,Department Store,Coffee Shop,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant,Falafel Restaurant
7,Scarborough,0,Bakery,Bus Line,Intersection,Bus Station,Metro Station,Soccer Field,Coffee Shop,Fast Food Restaurant,Farmers Market,Field
...,...,...,...,...,...,...,...,...,...,...,...,...
98,York,0,Diner,Pizza Place,Fried Chicken Joint,Pharmacy,Grocery Store,Gift Shop,Park,Yoga Studio,Elementary School,Dumpling Restaurant
99,Etobicoke,0,Sandwich Place,Pizza Place,Chinese Restaurant,Coffee Shop,Elementary School,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
100,Etobicoke,0,Arts & Crafts Store,Pizza Place,Music Venue,Bus Line,Clothing Store,Ethiopian Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room
101,Etobicoke,0,Grocery Store,Fast Food Restaurant,Park,Caribbean Restaurant,Discount Store,Beer Store,Liquor Store,Japanese Restaurant,Fried Chicken Joint,Coffee Shop


#### Cluster 2

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1,Construction & Landscaping,Gym / Fitness Center,Park,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space
17,North York,1,Residential Building (Apartment / Condo),Park,Yoga Studio,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Distribution Center
20,North York,1,Park,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space,Dog Run
23,North York,1,Park,Speakeasy,Convenience Store,Yoga Studio,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Ethiopian Restaurant
25,North York,1,Food & Drink Shop,Park,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space
34,North York,1,Pharmacy,Grocery Store,Park,Yoga Studio,Escape Room,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
40,East York,1,Intersection,Convenience Store,Park,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Yoga Studio
44,Central Toronto,1,Park,Convenience Store,Grocery Store,Bank,Breakfast Spot,Farmers Market,Farm,Falafel Restaurant,Fast Food Restaurant,Dumpling Restaurant
45,Central Toronto,1,Park,Convenience Store,Grocery Store,Bank,Breakfast Spot,Farmers Market,Farm,Falafel Restaurant,Fast Food Restaurant,Dumpling Restaurant
46,Central Toronto,1,Park,Convenience Store,Grocery Store,Bank,Breakfast Spot,Farmers Market,Farm,Falafel Restaurant,Fast Food Restaurant,Dumpling Restaurant


#### Cluster 3

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Scarborough,2,Auto Garage,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space,Dog Run


#### Cluster 4

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,3,Construction & Landscaping,Bar,Donut Shop,Fish & Chips Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
96,North York,3,Construction & Landscaping,Furniture / Home Store,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space


#### Cluster 5

In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Scarborough,4,Trail,Yoga Studio,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Escape Room,Event Space,Dog Run
37,East Toronto,4,Health Food Store,Pub,Trail,Yoga Studio,Escape Room,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
