# Segmenting and Clustering Neighborhoods in Toronto

### Importing libraries for Segmenting and Clustering

In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests
import json

### Reading table from last notebook

In [2]:
df = pd.read_csv('Toronto_ll.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Removing 'Unnamed: 0' column

In [3]:
df = df.drop('Unnamed: 0', axis=1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Use geopy library to get the latitude and longitude values of New York City.

In [4]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_course")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [5]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [6]:
scarborough_data = df[df['Borough'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head(10)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [7]:
address = 'Scarborough, Toronto Ontario'

geolocator = Nominatim(user_agent="toronto_course")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [8]:
# create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'], scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

### Define Foursquare Credentials and Version

In [9]:
CLIENT_ID = 'TL2XEQBNMUHYC4KWR2Q2HVYLTIIP4BMSTXG5QKRR3V0ACZPH' 
CLIENT_SECRET = 'TPMNE0ECMDR0JYJWBCPA0RW0Q1HGQQZ22JN1OI0MUIRR3GDJ' 
VERSION = '20180605' 

### Exploring Rouge, Malvern

In [10]:
scarborough_data.loc[0, 'Neighbourhood']

'Rouge,Malvern'

In [11]:
neighborhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = scarborough_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern are 43.8066863, -79.19435340000003.


In [12]:
LIMIT = 100 
radius = 500 

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=TL2XEQBNMUHYC4KWR2Q2HVYLTIIP4BMSTXG5QKRR3V0ACZPH&client_secret=TPMNE0ECMDR0JYJWBCPA0RW0Q1HGQQZ22JN1OI0MUIRR3GDJ&v=20180605&ll=43.8066863,-79.19435340000003&radius=500&limit=100'

In [13]:
results = requests.get(url).json()

In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [15]:
# clean the json and structure it into a pandas dataframe.
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056


In [16]:
# Explore Neighborhoods
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)           

In [18]:
# get scarborough venues
scarborough_venues = getNearbyVenues(names=scarborough_data['Neighbourhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longitude']
                                  )

print(scarborough_venues.shape)
scarborough_venues.head(10)

Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge
(98, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,RIGHT WAY TO GOLF,43.785177,-79.161108,Golf Course
2,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
5,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa
6,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
7,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
8,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center
9,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Lawrence Ave E & Kingston Rd,43.767704,-79.18949,Intersection


In [19]:
scarborough_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",2,2,2,2,2,2
"Birch Cliff,Cliffside West",4,4,4,4,4,4
Cedarbrae,9,9,9,9,9,9
"Clairlea,Golden Mile,Oakridge",9,9,9,9,9,9
"Clarks Corners,Sullivan,Tam O'Shanter",14,14,14,14,14,14
"Cliffcrest,Cliffside,Scarborough Village West",2,2,2,2,2,2
"Dorset Park,Scarborough Town Centre,Wexford Heights",8,8,8,8,8,8
"East Birchmount Park,Ionview,Kennedy Park",6,6,6,6,6,6
"Guildwood,Morningside,West Hill",7,7,7,7,7,7


In [20]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 61 uniques categories.


In [21]:
# analyze each neighborhood

# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighbourhood'] = scarborough_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,...,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Spa,Supermarket,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood,Morningside,West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
scarborough_onehot.shape

(98, 62)

In [23]:
scarborough_grouped = scarborough_onehot.groupby('Neighbourhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Brewery,...,Rental Car Location,Sandwich Place,Shopping Mall,Skating Rink,Soccer Field,Spa,Supermarket,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff,Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.0,0.111111,0.0,0.111111,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0
4,"Clairlea,Golden Mile,Oakridge",0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0


In [24]:
# print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in scarborough_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0             Clothing Store   0.2
1  Latin American Restaurant   0.2
2             Breakfast Spot   0.2
3               Skating Rink   0.2
4                     Lounge   0.2


----Agincourt North,L'Amoreaux East,Milliken,Steeles East----
               venue  freq
0               Park   0.5
1         Playground   0.5
2       Noodle House   0.0
3  Indian Restaurant   0.0
4       Intersection   0.0


----Birch Cliff,Cliffside West----
                   venue  freq
0  General Entertainment  0.25
1                   Café  0.25
2           Skating Rink  0.25
3        College Stadium  0.25
4         Medical Center  0.00


----Cedarbrae----
                venue  freq
0    Hakka Restaurant  0.11
1  Athletics & Sports  0.11
2              Bakery  0.11
3                Bank  0.11
4     Thai Restaurant  0.11


----Clairlea,Golden Mile,Oakridge----
            venue  freq
0          Bakery  0.22
1        Bus Line  0.22
2     Bus Station  0.11

In [25]:
# sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = scarborough_grouped['Neighbourhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Skating Rink,Latin American Restaurant,Clothing Store,Breakfast Spot,Vietnamese Restaurant,Discount Store,College Stadium,Convenience Store,Department Store
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
2,"Birch Cliff,Cliffside West",General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course,Gas Station,Fried Chicken Joint
3,Cedarbrae,Hakka Restaurant,Bakery,Lounge,Caribbean Restaurant,Fried Chicken Joint,Bank,Gas Station,Athletics & Sports,Thai Restaurant,Golf Course
4,"Clairlea,Golden Mile,Oakridge",Bakery,Bus Line,Metro Station,Ice Cream Shop,Soccer Field,Intersection,Bus Station,Discount Store,College Stadium,Convenience Store


### Run k-means to cluster the neighborhood into 5 clusters.

In [32]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 0, 0, 0, 0, 3, 0, 0, 0])

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')


In [36]:
scarborough_merged.tail(15)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,0.0,Rental Car Location,Medical Center,Spa,Electronics Store,Intersection,Mexican Restaurant,Breakfast Spot,Department Store,Coffee Shop,College Stadium
3,M1G,Scarborough,Woburn,43.770992,-79.216917,4.0,Coffee Shop,Korean Restaurant,Convenience Store,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Hakka Restaurant,Bakery,Lounge,Caribbean Restaurant,Fried Chicken Joint,Bank,Gas Station,Athletics & Sports,Thai Restaurant,Golf Course
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2.0,Playground,Convenience Store,Vietnamese Restaurant,Chinese Restaurant,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,0.0,Hobby Shop,Discount Store,Department Store,Bus Station,Convenience Store,Coffee Shop,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577,0.0,Bakery,Bus Line,Metro Station,Ice Cream Shop,Soccer Field,Intersection,Bus Station,Discount Store,College Stadium,Convenience Store
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476,3.0,Motel,American Restaurant,Vietnamese Restaurant,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848,0.0,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course,Gas Station,Fried Chicken Joint
10,M1P,Scarborough,"Dorset Park,Scarborough Town Centre,Wexford He...",43.75741,-79.273304,0.0,Indian Restaurant,Vietnamese Restaurant,Brewery,Thrift / Vintage Store,Light Rail Station,Pet Store,Chinese Restaurant,Bar,Convenience Store,American Restaurant
11,M1R,Scarborough,"Maryvale,Wexford",43.750071,-79.295849,0.0,Middle Eastern Restaurant,Accessories Store,Auto Garage,Bakery,Shopping Mall,Sandwich Place,Breakfast Spot,Electronics Store,College Stadium,Convenience Store


In [40]:
# remove 'NaN' values
scarborough_merged = scarborough_merged.dropna()
# convert 'Cluster Lables' column values to integers                                                                                
scarborough_merged['Cluster Labels'] = scarborough_merged['Cluster Labels'].astype(int)  

In [41]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighbourhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining the Clusters

#### Cluster 1

In [43]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0,History Museum,Golf Course,Bar,Clothing Store,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Electronics Store
2,Scarborough,0,Rental Car Location,Medical Center,Spa,Electronics Store,Intersection,Mexican Restaurant,Breakfast Spot,Department Store,Coffee Shop,College Stadium
4,Scarborough,0,Hakka Restaurant,Bakery,Lounge,Caribbean Restaurant,Fried Chicken Joint,Bank,Gas Station,Athletics & Sports,Thai Restaurant,Golf Course
6,Scarborough,0,Hobby Shop,Discount Store,Department Store,Bus Station,Convenience Store,Coffee Shop,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course
7,Scarborough,0,Bakery,Bus Line,Metro Station,Ice Cream Shop,Soccer Field,Intersection,Bus Station,Discount Store,College Stadium,Convenience Store
9,Scarborough,0,General Entertainment,Skating Rink,College Stadium,Café,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course,Gas Station,Fried Chicken Joint
10,Scarborough,0,Indian Restaurant,Vietnamese Restaurant,Brewery,Thrift / Vintage Store,Light Rail Station,Pet Store,Chinese Restaurant,Bar,Convenience Store,American Restaurant
11,Scarborough,0,Middle Eastern Restaurant,Accessories Store,Auto Garage,Bakery,Shopping Mall,Sandwich Place,Breakfast Spot,Electronics Store,College Stadium,Convenience Store
12,Scarborough,0,Lounge,Skating Rink,Latin American Restaurant,Clothing Store,Breakfast Spot,Vietnamese Restaurant,Discount Store,College Stadium,Convenience Store,Department Store
13,Scarborough,0,Pizza Place,Pharmacy,Noodle House,Thai Restaurant,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Shopping Mall,Bank,Chinese Restaurant


#### Cluster 2

In [44]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,1,Fast Food Restaurant,Vietnamese Restaurant,Hakka Restaurant,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Electronics Store,Discount Store


#### Cluster 3

In [45]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,2,Playground,Convenience Store,Vietnamese Restaurant,Chinese Restaurant,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
14,Scarborough,2,Playground,Park,Vietnamese Restaurant,Chinese Restaurant,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Electronics Store


#### Cluster 4

In [46]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,3,Motel,American Restaurant,Vietnamese Restaurant,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant,Electronics Store


#### Cluster 5

In [47]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,4,Coffee Shop,Korean Restaurant,Convenience Store,Vietnamese Restaurant,Clothing Store,Grocery Store,Golf Course,General Entertainment,Gas Station,Fried Chicken Joint
