# Week 3 Assignement: Segmenting and Clustering Neighborhoods in Toronto

Please note that I did everything in one notebook, this is why I put 3 parts.

## Setting up import and variables

In [1]:
# Import standard libraries
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import pgeocode
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
import foursquare_id #my ids, hidden somewhere else :)

In [2]:
# Define  target url
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

## Part 1: WebScrapping and table cleaning

In [3]:
# Load the table in dataframe, using pandas, it's very convenient and working well in this case
df_wiki = pd.read_html(url)[0]
# Keep only rows where Borough is assigned
df_wiki = df_wiki[df_wiki['Borough'] != 'Not assigned']
df_wiki.head(3)


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Group by Postcode and Borough, join the Neighbourhood with a comma:

In [4]:
df_wiki = df_wiki[['Postcode', 'Borough', 'Neighbourhood']].drop_duplicates()\
                         .groupby(['Postcode','Borough'])['Neighbourhood']\
                         .apply(list)\
                         .reset_index()
df_wiki['Neighbourhood'] = df_wiki.apply(lambda x: (','.join([str(s) for s in x['Neighbourhood']])), axis = 1)
df_wiki.head(3)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"


### If Neighbourhood is not assigned, assigned the Borough:

In [5]:
for row in range(len(df_wiki)):
    if df_wiki.iloc[row]['Neighbourhood'] == 'Not assigned':
        df_wiki.at[row, 'Neighbourhood']  = df_wiki.iloc[row, 1]
df_wiki.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
print("Rows: {}\nColumns: {}".format(df_wiki.shape[0],df_wiki.shape[1]))

Rows: 103
Columns: 3


## Part 2: Retreiving coordinates and adding to dataframe

In [7]:
# The provided function to get coordinate was very long, this one is way faster
def get_coordinate(postal_code, country):
    """
    Return coordinate of given localisation

    Parameters:
    postal_code : Searched postal code
    country : Country of the postal code
    
    Return:
    latitude, longitude (tuple) : Coordinates of given localisation
    """
    nomi = pgeocode.Nominatim(country)
    res = nomi.query_postal_code(postal_code)
    return res.latitude, res.longitude

### Search for all coordinates and add to dataframe:

In [8]:
list_latitudes = list()
list_longitudes = list()
for row in range(len(df_wiki)):
    latitude, longitude = get_coordinate(df_wiki.iloc[row, 0], 'CA')
    list_latitudes.append(latitude)
    list_longitudes.append(longitude)
df_wiki["Latitude"] = list_latitudes
df_wiki["Longitude"] = list_longitudes

### Check if there is any issue:

In [9]:
df_wiki[df_wiki["Longitude"].isna()]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
86,M7R,Mississauga,Canada Post Gateway Processing Centre,,


So we have a missing coordinate.
After a quick search in google, it seams the correct Postcode is L5K 2L8: [Google search](https://www.google.com/search?sxsrf=ACYBGNQo4hWVDJ8mlIUjBYbFSgAxTQAAVA%3A1579146445743&ei=zdwfXvSALcz0aMWWlMgO&q=canada+mississauga++Canada+Post+Gateway+Processing+Centre+postal+code&oq=canada+mississauga++Canada+Post+Gateway+Processing+Centre+postal+code&gs_l=psy-ab.3...571233.572000..572575...0.3..0.109.211.2j1......0....1j2..gws-wiz.......0i71j35i39i19j0i8i67j0i8i30.B1IFQr4fxlE&ved=0ahUKEwi0m8n3mofnAhVMOhoKHUULBekQ4dUDCAs&uact=5)

Let's get coordinates and edit the dataframe.


In [10]:
latitude, longitude = get_coordinate('L5K 2L8', 'CA')
df_wiki.at[86, 'Latitude']  = latitude
df_wiki.at[86, 'Longitude']  = longitude
df_wiki[df_wiki["Longitude"].isna()]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude


### No issue anymore, print the dataframe:

In [11]:
df_wiki

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8113,-79.1930
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7878,-79.1564
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7678,-79.1866
3,M1G,Scarborough,Woburn,43.7712,-79.2144
4,M1H,Scarborough,Cedarbrae,43.7686,-79.2389
...,...,...,...,...,...
98,M9N,York,Weston,43.7068,-79.5170
99,M9P,Etobicoke,Westmount,43.6949,-79.5323
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.6898,-79.5582
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.7432,-79.5876


## Part 3: Cluster 

### First, let's see what we've got

In [12]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[43.656, -79.391], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_wiki['Latitude'], df_wiki['Longitude'], df_wiki['Borough'], df_wiki['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### We will focus on downtown 

In [13]:
downtown_data = df_wiki[df_wiki['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.6827,-79.373
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.6684,-79.3689
2,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.383
3,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.6572,-79.3783


### Let's plot this

In [14]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[43.656, -79.391], zoom_start=13)

# add markers to map
for lat, lng, label in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Define Foursquare Credentials and Version

In [15]:
CLIENT_ID = foursquare_id.CLIENT_ID # your Foursquare ID
CLIENT_SECRET = foursquare_id.CLIENT_SECRET # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

###  Let's get the top 100 venues that are in each Neighbourhood

In [16]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print("Processing {}...".format(name))
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
# type your answer here

downtown_venues = getNearbyVenues(names=downtown_data['Neighbourhood'],
                                   latitudes=downtown_data['Latitude'],
                                   longitudes=downtown_data['Longitude']
                                  )

Processing Rosedale...
Processing Cabbagetown,St. James Town...
Processing Church and Wellesley...
Processing Harbourfront...
Processing Ryerson,Garden District...
Processing St. James Town...
Processing Berczy Park...
Processing Central Bay Street...
Processing Adelaide,King,Richmond...
Processing Harbourfront East,Toronto Islands,Union Station...
Processing Design Exchange,Toronto Dominion Centre...
Processing Commerce Court,Victoria Hotel...
Processing Harbord,University of Toronto...
Processing Chinatown,Grange Park,Kensington Market...
Processing CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara...
Processing Stn A PO Boxes 25 The Esplanade...
Processing First Canadian Place,Underground city...
Processing Christie...
Processing Queen's Park...


In [18]:
print(downtown_venues.shape)
downtown_venues.head()

(1263, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.6827,-79.373,Summerhill Market,43.686265,-79.375458,Grocery Store
1,Rosedale,43.6827,-79.373,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.6827,-79.373,Scoops Convenience Boutique,43.686148,-79.375828,Candy Store
3,Rosedale,43.6827,-79.373,Rosedale Park,43.682328,-79.378934,Playground
4,"Cabbagetown,St. James Town",43.6684,-79.3689,Cranberries,43.667843,-79.369407,Diner


### Let's check how many venues were returned for each neighborhood


In [19]:
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,91,91,91,91,91,91
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",57,57,57,57,57,57
"Cabbagetown,St. James Town",41,41,41,41,41,41
Central Bay Street,98,98,98,98,98,98
"Chinatown,Grange Park,Kensington Market",85,85,85,85,85,85
Christie,12,12,12,12,12,12
Church and Wellesley,84,84,84,84,84,84
"Commerce Court,Victoria Hotel",100,100,100,100,100,100
"Design Exchange,Toronto Dominion Centre",100,100,100,100,100,100


In [20]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 187 uniques categories.


### Analyze Each Neighborhood

In [21]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

downtown_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,Bagel Shop,...,Thrift / Vintage Store,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [22]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Thrift / Vintage Store,Toy / Game Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store
0,"Adelaide,King,Richmond",0.0,0.0,0.03,0.01,0.0,0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01
1,Berczy Park,0.0,0.0,0.0,0.021978,0.0,0.0,0.0,0.010989,0.0,...,0.0,0.0,0.0,0.010989,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.017544,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.010204,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.010204,0.0,0.0,0.010204,0.010204,0.0,0.010204,0.0,0.0
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.023529,0.011765,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.058824,0.0,0.047059,0.0,0.011765,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.083333,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.011905,0.011905,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.011905,0.0
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.03,0.01,0.0,0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.02,0.01,0.0,0.02,0.0,0.0,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0


### Let's print each neighborhood along with the top 5 most common venues

In [23]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0         Café  0.06
1  Coffee Shop  0.06
2        Hotel  0.04
3   Steakhouse  0.04
4   Restaurant  0.04


----Berczy Park----
          venue  freq
0   Coffee Shop  0.11
1          Café  0.05
2         Hotel  0.05
3        Bakery  0.04
4  Cocktail Bar  0.03


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
                venue  freq
0         Coffee Shop  0.09
1  Italian Restaurant  0.07
2                 Bar  0.05
3                Café  0.05
4                Park  0.04


----Cabbagetown,St. James Town----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.05
2   Restaurant  0.05
3       Market  0.05
4       Bakery  0.05


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.19
1  Italian Restaurant  0.03
2      Clothing Store  0.03
3                Café  0.03
4              Bakery  0.03


----Chinatown,Grange Park,Kensington Market----


In [24]:
# Function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### Create the new dataframe and display the top 10 venues for each neighborhood

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Café,Coffee Shop,Hotel,Restaurant,Steakhouse,Gym,Bar,Japanese Restaurant,Bakery,Burger Joint
1,Berczy Park,Coffee Shop,Hotel,Café,Bakery,Seafood Restaurant,Restaurant,Beer Bar,Cocktail Bar,Japanese Restaurant,Italian Restaurant
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Coffee Shop,Italian Restaurant,Café,Bar,Gym / Fitness Center,Restaurant,Pub,Park,Bakery,Men's Store
3,"Cabbagetown,St. James Town",Coffee Shop,Café,Restaurant,Italian Restaurant,Pizza Place,Bakery,Market,Pet Store,Butcher,Caribbean Restaurant
4,Central Bay Street,Coffee Shop,Bakery,Italian Restaurant,Café,Clothing Store,Tea Room,Juice Bar,Sandwich Place,Breakfast Spot,Bubble Tea Shop


### Cluster Neighborhoods

### Run k-means to cluster the neighborhood into 5 clusters.

In [26]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 0, 4, 2, 1, 2, 2, 2])

### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

downtown_merged = downtown_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

downtown_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.6827,-79.373,1,Park,Grocery Store,Playground,Candy Store,Diner,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.6684,-79.3689,2,Coffee Shop,Café,Restaurant,Italian Restaurant,Pizza Place,Bakery,Market,Pet Store,Butcher,Caribbean Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.6656,-79.383,2,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Burger Joint,Restaurant,Café,Hotel,Gastropub,Bubble Tea Shop
3,M5A,Downtown Toronto,Harbourfront,43.6555,-79.3626,0,Coffee Shop,Breakfast Spot,Restaurant,Spa,Thai Restaurant,Electronics Store,Mexican Restaurant,Italian Restaurant,Food Truck,Beer Store
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.6572,-79.3783,2,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Japanese Restaurant,Plaza,Diner,Sporting Goods Shop,Burger Joint


In [28]:
# create map
map_clusters = folium.Map(location=[43.656, -79.391], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

 ## Clusters examination

#### Cluster 1: Coffee Shop/Breakfast Spot

In [29]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Restaurant,Spa,Thai Restaurant,Electronics Store,Mexican Restaurant,Italian Restaurant,Food Truck,Beer Store


#### Cluster 2: Park

In [30]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,1,Park,Grocery Store,Playground,Candy Store,Diner,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


#### Cluster 3: Coffee Shop/Café

In [31]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Italian Restaurant,Pizza Place,Bakery,Market,Pet Store,Butcher,Caribbean Restaurant
2,Downtown Toronto,2,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Burger Joint,Restaurant,Café,Hotel,Gastropub,Bubble Tea Shop
4,Downtown Toronto,2,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Japanese Restaurant,Plaza,Diner,Sporting Goods Shop,Burger Joint
5,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Seafood Restaurant,Bakery,Hotel,Cocktail Bar,Breakfast Spot,Cosmetics Shop,Diner
6,Downtown Toronto,2,Coffee Shop,Hotel,Café,Bakery,Seafood Restaurant,Restaurant,Beer Bar,Cocktail Bar,Japanese Restaurant,Italian Restaurant
7,Downtown Toronto,2,Coffee Shop,Bakery,Italian Restaurant,Café,Clothing Store,Tea Room,Juice Bar,Sandwich Place,Breakfast Spot,Bubble Tea Shop
8,Downtown Toronto,2,Café,Coffee Shop,Hotel,Restaurant,Steakhouse,Gym,Bar,Japanese Restaurant,Bakery,Burger Joint
10,Downtown Toronto,2,Coffee Shop,Hotel,Café,Seafood Restaurant,Steakhouse,Restaurant,Bar,Italian Restaurant,Gastropub,Beer Bar
11,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Gym,Hotel,Steakhouse,Burger Joint,Gastropub,Bar,Asian Restaurant
12,Downtown Toronto,2,Café,Bookstore,Italian Restaurant,Japanese Restaurant,Bakery,Restaurant,Pub,Sandwich Place,Chinese Restaurant,Yoga Studio


#### Cluster 4: Grocery Store

In [32]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,3,Grocery Store,Café,Park,Athletics & Sports,Candy Store,Baby Store,Coffee Shop,Playground,Doner Restaurant,Empanada Restaurant


#### Cluster 5: Harbor / Marina 	

In [33]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Downtown Toronto,4,Harbor / Marina,Café,Music Venue,Park,Gourmet Shop,Diner,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Greek Restaurant


In [34]:
dict_clust = {0:'Coffee Shop/Breakfast Spot',1:'Park',2:'Coffee shop/Caffé',3:'Grocery',4:'Harbor'}
downtown_merged['Cluster names']= downtown_merged['Cluster Labels'].map(dict_clust)

# Bonus: Final map with cluster names

In [35]:
# create map
map_clusters = folium.Map(location=[43.656, -79.391], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster, cluster_name in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels'], downtown_merged['Cluster names']):
    label = folium.Popup(str(poi) + ' Cluster ' + cluster_name, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters