## Importing Relevant Libraries for WebScraping and Setting up the DataFrame

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

### I use BS to scrape the table from wikipedia

In [2]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table', class_ = "wikitable sortable").tbody.text

### The scraped table information is in str format. Here I convert it to a Pandas Dataframe

In [3]:
table_list = [line.split('\n') for line in (table.split('\n\n'))]

df = pd.DataFrame(table_list, columns = table_list[0])
df.drop([0,289], inplace= True)#I drop the first and last rows which are the table header and none, repectively

df.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood
1,,M1A,Not assigned,Not assigned
2,,M2A,Not assigned,Not assigned
3,,M3A,North York,Parkwoods
4,,M4A,North York,Victoria Village
5,,M5A,Downtown Toronto,Harbourfront


### Table rows with unassigned Boroughs are dropped

In [4]:
na = df['Borough'] == "Not assigned"
df = df[~na].reset_index(drop = True)

In [5]:
df.shape

(211, 4)

In [6]:
df.head(10)

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood
0,,M3A,North York,Parkwoods
1,,M4A,North York,Victoria Village
2,,M5A,Downtown Toronto,Harbourfront
3,,M5A,Downtown Toronto,Regent Park
4,,M6A,North York,Lawrence Heights
5,,M6A,North York,Lawrence Manor
6,,M7A,Queen's Park,Not assigned
7,,M9A,Etobicoke,Islington Avenue
8,,M1B,Scarborough,Rouge
9,,M1B,Scarborough,Malvern


### I replace unassigned Neighborhoods with the name of the Borough

In [7]:
df['Neighbourhood'][df['Neighbourhood'] == "Not assigned"] = df['Borough'][df['Neighbourhood'] == "Not assigned"]

In [8]:
df.head(10)

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood
0,,M3A,North York,Parkwoods
1,,M4A,North York,Victoria Village
2,,M5A,Downtown Toronto,Harbourfront
3,,M5A,Downtown Toronto,Regent Park
4,,M6A,North York,Lawrence Heights
5,,M6A,North York,Lawrence Manor
6,,M7A,Queen's Park,Queen's Park
7,,M9A,Etobicoke,Islington Avenue
8,,M1B,Scarborough,Rouge
9,,M1B,Scarborough,Malvern


### Here, I aggregate the df based on Neighborhoods with similar Boroughs and Postcodes

In [9]:
df_grouped = df.groupby(['Postcode', 'Borough'], as_index = 0, sort = 0).agg(', '.join)

In [10]:
df_grouped.head()

Unnamed: 0,Postcode,Borough,Unnamed: 3,Neighbourhood
0,M3A,North York,,Parkwoods
1,M4A,North York,,Victoria Village
2,M5A,Downtown Toronto,",","Harbourfront, Regent Park"
3,M6A,North York,",","Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,,Queen's Park


In [11]:
df_grouped.drop(columns='', inplace= True)

In [12]:
df_grouped.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [13]:
df_grouped.shape

(103, 3)

### Read in the CSV file with the Geospatial Coordinates of Toronto

In [14]:
df_coordinates = pd.read_csv('Geospatial_Coordinates.csv')

In [15]:
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df_coordinates.rename(columns = {'Postal Code': 'Postcode'}, inplace=True)
df_coordinates.head(2)

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497


### Merge the Neighborhood df with the Geospatial df

In [17]:
df_toronto = df_grouped.merge(df_coordinates, how = 'left', on = 'Postcode')

### Checking for any null values

In [18]:
pd.isnull(df_toronto['Latitude']).sum()

0

In [19]:
df_toronto.sample(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
75,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325
53,M3M,North York,Downsview Central,43.728496,-79.495697
49,M6L,North York,"Downsview, North Park, Upwood Park",43.713756,-79.490074
71,M1R,Scarborough,"Maryvale, Wexford",43.750072,-79.295849
93,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
0,M3A,North York,Parkwoods,43.753259,-79.329656
95,M1X,Scarborough,Upper Rouge,43.836125,-79.205636
22,M1G,Scarborough,Woburn,43.770992,-79.216917
62,M5N,Central Toronto,Roselawn,43.711695,-79.416936
13,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923


## Importing Necessary Libraries for Displaying Maps and Clustering

In [20]:
from sklearn.cluster import KMeans
import folium

from geopy.geocoders import Nominatim

import json
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

#### Instantiating the geolocator object to retrieve Toronto's ccoordinates

In [21]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent = "Toronto_explorer")
location = geolocator.geocode(address)
latitude, longitude = location.latitude, location.longitude

#### Visualizing a map of Toronto's Boroughs and Neighbourhoods

In [22]:
map_toronto = folium.Map(location = [latitude, longitude], zoom_start=10)

for lat, long, neigh, bor in zip(df_toronto['Latitude'], df_toronto['Longitude'],
                                 df_toronto['Neighbourhood'], df_toronto['Borough']):
    label = f'{neigh},{bor}'
    label = folium.Popup(label, parse_html = True)
    
    folium.CircleMarker(
    [lat, long],
    radius = 5,
    popup = label,
    color = 'blue',
    fill = 1,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html = 0,
    ).add_to(map_toronto)

In [23]:
map_toronto

#### I have chosen to explore Downtown Toronto, being Toronto's main Business and Entertainment district

In [24]:
df_toronto[df_toronto['Borough'] == "Downtown Toronto"]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
36,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.640816,-79.381752
42,M5K,Downtown Toronto,"Design Exchange, Toronto Dominion Centre",43.647177,-79.381576
48,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817


In [25]:
#creating new dataframe with only Downtown Toronto Borough

DT_data = df_toronto[df_toronto.Borough == 'Downtown Toronto'].reset_index(drop = 1)
DT_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


#### Instantiating the geolocator object and extracting the coordinates of Downtown Toronto


In [26]:
address = 'Downtown Toronto, TO'

geolocator = Nominatim(user_agent = "DT_explorer")
location = geolocator.geocode(address)
latitude, longitude = location.latitude, location.longitude

In [27]:
CLIENT_ID = 'xxxxx'
CLIENT_SECRET = 'xxxxx'
VERSION = '20190412'

### Defining a function to loop through the neighbourhoods in DT and extract their associated venues from foursquare API

In [28]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit = 100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Running the function on the DT data subset

In [29]:
downtown_venues = getNearbyVenues(names=DT_data['Neighbourhood'],
                                   latitudes=DT_data['Latitude'],
                                   longitudes=DT_data['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
First Canadian Place, Underground city
Church and Wellesley


In [30]:
#Shape of the resulting table with 10 rows randomly sampled

print(downtown_venues.shape)
downtown_venues.sample(10)

(1294, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
10,"Harbourfront, Regent Park",43.65426,-79.360636,SOMA chocolatemaker,43.650622,-79.358127,Chocolate Shop
23,"Harbourfront, Regent Park",43.65426,-79.360636,Brick Street Bakery,43.650574,-79.359539,Bakery
264,Berczy Park,43.644771,-79.373306,Bier Markt Esplanade,43.647236,-79.373892,Belgian Restaurant
980,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,Loaded Pierogi,43.647965,-79.373427,Comfort Food Restaurant
1001,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,Aveda Institute Toronto,43.650096,-79.37363,Cosmetics Shop
759,"Commerce Court, Victoria Hotel",43.648198,-79.379817,Garrison Bespoke,43.648102,-79.376334,Tailor Shop
715,"Commerce Court, Victoria Hotel",43.648198,-79.379817,King Taps,43.648476,-79.382058,Gastropub
1016,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,Brookfield Place,43.646791,-79.378769,Shopping Mall
960,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
1002,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,Equinox Bay Street,43.6481,-79.379989,Gym


#### Converting the 'venue category' variable from a categorical variable to continuous variable to enable clustering

In [31]:
#create a new df based on onehot encoded data
downtown_venues_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_venues_onehot['Neighbourhood'] = downtown_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_venues_onehot.columns[-1]] + list(downtown_venues_onehot.columns[:-1])
downtown_venues_onehot = downtown_venues_onehot[fixed_columns]

downtown_venues_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 203 uniques categories.


#### Aggregating the df based on neighbourhoods, so there's one row per neighborhood

In [33]:
downtown_grouped = downtown_venues_onehot.groupby('Neighbourhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.066667,0.066667,0.066667,0.133333,0.2,0.133333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,...,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.011628
5,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.0,0.06,0.0,0.04,0.01,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.011236,0.011236,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,...,0.0,0.0,0.0,0.0,0.0,0.011236,0.011236,0.0,0.011236,0.022472
8,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
9,"Design Exchange, Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0


#### For each neighbourhood, I'd like to see the 5 most common venue categories

In [34]:
num_top_venues = 5

for neighbourhood in downtown_grouped['Neighbourhood']:
    print("----"+neighbourhood+"----")
    temp = downtown_grouped[downtown_grouped['Neighbourhood'] == neighbourhood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.06
1                 Café  0.04
2           Steakhouse  0.04
3  American Restaurant  0.04
4      Thai Restaurant  0.04


----Berczy Park----
            venue  freq
0     Coffee Shop  0.09
1    Cocktail Bar  0.05
2  Farmers Market  0.04
3      Steakhouse  0.04
4             Pub  0.04


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.20
1  Airport Terminal  0.13
2    Airport Lounge  0.13
3     Boat or Ferry  0.07
4  Sculpture Garden  0.07


----Cabbagetown, St. James Town----
                venue  freq
0         Coffee Shop  0.10
1                Park  0.06
2                Café  0.06
3          Restaurant  0.06
4  Italian Restaurant  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1  Italian Restaurant  0.05
2                 Bar  0.03
3      

### Creating a new df with the 10 most common venues for each neighborhood

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = downtown_grouped['Neighbourhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :],
                                                                           num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Steakhouse,Thai Restaurant,American Restaurant,Café,Restaurant,Salad Place,Hotel,Burger Joint,Bar
1,Berczy Park,Coffee Shop,Cocktail Bar,Café,Farmers Market,Steakhouse,Cheese Shop,Pub,Restaurant,Bakery,Seafood Restaurant
2,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Plane,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Harbor / Marina
3,"Cabbagetown, St. James Town",Coffee Shop,Restaurant,Café,Park,Pub,Italian Restaurant,Bakery,Pizza Place,Chinese Restaurant,Breakfast Spot
4,Central Bay Street,Coffee Shop,Italian Restaurant,Burger Joint,Café,Ice Cream Shop,Bar,Bubble Tea Shop,Indian Restaurant,Spa,Japanese Restaurant


## Running the clustering algorithm: K-means

In [37]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for the 1st 10 rows in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 4, 0, 0, 3, 2, 0, 0, 0])

#### Adding the cluster labels generated and merging the dataframes to get the entire picture

In [38]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = DT_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'),
                                       on='Neighbourhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,0,Coffee Shop,Pub,Bakery,Park,Café,Restaurant,Breakfast Spot,Mexican Restaurant,Theater,French Restaurant
1,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Middle Eastern Restaurant,Cosmetics Shop,Japanese Restaurant,Restaurant,Bar,Diner,Pizza Place
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Hotel,Café,Restaurant,Cosmetics Shop,Breakfast Spot,Gastropub,Bakery,Cocktail Bar,Italian Restaurant
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Café,Farmers Market,Steakhouse,Cheese Shop,Pub,Restaurant,Bakery,Seafood Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Burger Joint,Café,Ice Cream Shop,Bar,Bubble Tea Shop,Indian Restaurant,Spa,Japanese Restaurant


### Visualizing Downtown Toronto with the clustered venues in differing colours

In [39]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'],
                        downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Reviewing the constituents of the respective clusters

In [40]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, 
        downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Coffee Shop,Pub,Bakery,Park,Café,Restaurant,Breakfast Spot,Mexican Restaurant,Theater,French Restaurant
1,Downtown Toronto,0,Coffee Shop,Clothing Store,Café,Middle Eastern Restaurant,Cosmetics Shop,Japanese Restaurant,Restaurant,Bar,Diner,Pizza Place
2,Downtown Toronto,0,Coffee Shop,Hotel,Café,Restaurant,Cosmetics Shop,Breakfast Spot,Gastropub,Bakery,Cocktail Bar,Italian Restaurant
3,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Café,Farmers Market,Steakhouse,Cheese Shop,Pub,Restaurant,Bakery,Seafood Restaurant
4,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Burger Joint,Café,Ice Cream Shop,Bar,Bubble Tea Shop,Indian Restaurant,Spa,Japanese Restaurant
6,Downtown Toronto,0,Coffee Shop,Steakhouse,Thai Restaurant,American Restaurant,Café,Restaurant,Salad Place,Hotel,Burger Joint,Bar
7,Downtown Toronto,0,Coffee Shop,Aquarium,Hotel,Italian Restaurant,Café,Pizza Place,Bakery,Scenic Lookout,Fried Chicken Joint,Brewery
8,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Gastropub,Italian Restaurant,Bakery,Deli / Bodega,Steakhouse
9,Downtown Toronto,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Steakhouse,Deli / Bodega,Italian Restaurant,Seafood Restaurant,Bakery
14,Downtown Toronto,0,Coffee Shop,Restaurant,Café,Seafood Restaurant,Pub,Hotel,Cocktail Bar,Fast Food Restaurant,Creperie,Beer Bar


In [41]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1,
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,1,Park,Playground,Trail,Department Store,Ethiopian Restaurant,Electronics Store,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [42]:

downtown_merged.loc[downtown_merged['Cluster Labels'] == 2,
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Downtown Toronto,2,Grocery Store,Café,Park,Italian Restaurant,Nightclub,Convenience Store,Restaurant,Diner,Baby Store,Athletics & Sports


In [43]:

downtown_merged.loc[downtown_merged['Cluster Labels'] == 3,
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Café,Restaurant,Bar,Japanese Restaurant,Bakery,Bookstore,Chinese Restaurant,Italian Restaurant,Dessert Shop,Beer Store
11,Downtown Toronto,3,Café,Vegetarian / Vegan Restaurant,Bar,Coffee Shop,Mexican Restaurant,Vietnamese Restaurant,Bakery,Dumpling Restaurant,Chinese Restaurant,Farmers Market


In [44]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4,
                    downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,4,Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Plane,Sculpture Garden,Airport,Airport Food Court,Airport Gate,Harbor / Marina


##### Reviewing the results of the clustering shows that the venues were clustered majorly based on the most common venue type, as there appears to be a clear distinction between the clusters viz: coffee shops, park, Grocery Store, Cafes and Airport Service.  They all also appear to be well separated geographically, as observed on the map. The coffee shops are concentrated in the financial district of downtown and they make up a vast majority of the venue categories.
