# IBM DataScience - Capstone - wk3 Part 3 - Sven De Smit
# Segmenting and Clustering Neighborhoods in Toronto

In [1]:
#!conda install -c conda-forge folium=0.5.0 --yes

In [2]:
import pandas as pd

import requests

import folium

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

## Read neighborhood dataset from the file created by the previous exercise 

In [3]:
df_postal_loc = pd.read_csv('toronto_postal_codes_with_location.csv')
print(df_postal_loc.shape)
df_postal_loc.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Show neighborhoods on a map

### Find geo location of the city to make sure that the map is centered correctly 

In [4]:
from geopy.geocoders import Nominatim 

address = 'Canada, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


### Show the neighborhoods on a city map

In [5]:
map_toronto = folium.Map(location=[latitude +0.05, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_postal_loc['Latitude'], df_postal_loc['Longitude'], df_postal_loc['Borough'], df_postal_loc['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=7,
        popup=label,
        color='red',
        fill=True,
        fill_color='orange',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
#3186cc
map_toronto

## Create venue dataset with Foursquare API's

### Read Foursquare credentials from CSV file

In [6]:
df_credentials = pd.read_csv('credentials.txt',header=None)
CLIENT_ID = df_credentials[1][0]
CLIENT_SECRET = df_credentials[1][1]

### Get nearby venues for all rows in the neighborhood dataset

In [7]:
VERSION = '20180605' # Foursquare API version
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        #print(requests.get(url).json())
        results = requests.get(url).json()["response"]['groups'][0]['items']
        #print(results)
        #print(' ')
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
LIMIT = 100
toronto_venues = getNearbyVenues(names=df_postal_loc['Neighbourhood'],
                                   latitudes=df_postal_loc['Latitude'],
                                   longitudes=df_postal_loc['Longitude']
                                  )


In [9]:
print('There are {} venues in this dataset.'.format(toronto_venues.shape[0]))
toronto_venues.head()

There are 2244 venues in this dataset.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant


### Get an idea how much venues we have per neighborhoud

In [10]:
res = toronto_venues.groupby('Neighborhood')['Venue'].count()
#print(type(res))
#print(res.index)
for n,vc in zip(res.index,res):
    print(vc,'\t',n)

100 	 Adelaide, King, Richmond
5 	 Agincourt
2 	 Agincourt North, L'Amoreaux East, Milliken, Steeles East
9 	 Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown
9 	 Alderwood, Long Branch
22 	 Bathurst Manor, Downsview North, Wilson Heights
4 	 Bayview Village
22 	 Bedford Park, Lawrence Manor East
57 	 Berczy Park
5 	 Birch Cliff, Cliffside West
9 	 Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
21 	 Brockton, Exhibition Place, Parkdale Village
19 	 Business Reply Mail Processing Centre 969 Eastern
2 	 CFB Toronto, Downsview East
14 	 CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
45 	 Cabbagetown, St. James Town
5 	 Caledonia-Fairbanks
11 	 Canada Post Gateway Processing Centre
7 	 Cedarbrae
84 	 Central Bay Street
100 	 Chinatown, Grange Park, Kensington Market
15 	 Christie
84 	 Church and Wellesley
9 	 Clairlea, Golden Mile, Oakridge
10 	 Clarks Cor

### Get an overview of the venue categories

In [11]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues['Venue Category'].unique()

There are 280 uniques categories.


array(['Fast Food Restaurant', 'Bar', 'Pizza Place', 'Electronics Store',
       'Mexican Restaurant', 'Rental Car Location', 'Medical Center',
       'Intersection', 'Breakfast Spot', 'Coffee Shop',
       'Korean Restaurant', 'Hakka Restaurant', 'Caribbean Restaurant',
       'Thai Restaurant', 'Athletics & Sports', 'Bank', 'Bakery',
       'Fried Chicken Joint', 'Playground', 'Spa', 'Convenience Store',
       'Department Store', 'Discount Store', 'Chinese Restaurant',
       'Bus Station', 'Bus Line', 'Metro Station', 'Soccer Field',
       'Motel', 'Movie Theater', 'American Restaurant', 'Café',
       'General Entertainment', 'Farm', 'Skating Rink', 'College Stadium',
       'Indian Restaurant', 'Vietnamese Restaurant', 'Pet Store',
       'Latin American Restaurant', 'Sandwich Place',
       'Middle Eastern Restaurant', 'Shopping Mall', 'Auto Garage',
       'Lounge', 'Italian Restaurant', 'Noodle House', 'Pharmacy', 'Park',
       'Grocery Store', 'Thrift / Vintage Store', 'Nai

### Create a one hot encoding matrix, with venue categories as columns and venues from the venues in the venue dataframe as rows 

In [12]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
print(toronto_onehot.shape)
toronto_onehot.head()

(2244, 280)


Unnamed: 0,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Remove 'Neighborhood' column in the onehot dataset
For some reason there seems to be a 'Neighborhood' column in this dataset, where we would only expect venue categories

In [13]:
#toronto_onehot['Neighborhood']

In [14]:
toronto_onehot.drop(columns=['Neighborhood'],inplace=True)

In [15]:
#toronto_onehot['Neighborhood']

In [16]:
### Add Neighboorhood column to the onehot dataset as a first column

In [17]:
# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
### For each Neighborhood, calculate the average % for each venue category (% of total venues) 

In [19]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean()
# Neighborhood from index => column
toronto_grouped.reset_index(inplace=True)
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### For each neighborhood, show the top 5 venue categories

In [20]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.08
1             Café  0.05
2              Bar  0.04
3       Steakhouse  0.04
4  Thai Restaurant  0.04


----Agincourt----
                venue  freq
0      Breakfast Spot   0.2
1  Chinese Restaurant   0.2
2              Lounge   0.2
3        Skating Rink   0.2
4      Sandwich Place   0.2


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0           Playground   0.5
1                 Park   0.5
2    Accessories Store   0.0
3        Metro Station   0.0
4  Monument / Landmark   0.0


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.22
1           Pizza Place  0.11
2           Coffee Shop  0.11
3        Sandwich Place  0.11
4  Fast Food Restaurant  0.11


----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.22
1    Sk

                venue  freq
0      Discount Store  0.33
1    Department Store  0.17
2         Coffee Shop  0.17
3  Chinese Restaurant  0.17
4         Bus Station  0.17


----East Toronto----
               venue  freq
0               Park   0.4
1        Pizza Place   0.2
2        Coffee Shop   0.2
3  Convenience Store   0.2
4  Martial Arts Dojo   0.0


----Emery, Humberlea----
                             venue  freq
0                   Baseball Field   1.0
1                Accessories Store   0.0
2               Mexican Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Fairview, Henry Farm, Oriole----
                  venue  freq
0        Clothing Store  0.14
1           Coffee Shop  0.08
2  Fast Food Restaurant  0.08
3   Japanese Restaurant  0.03
4            Shoe Store  0.03


----First Canadian Place, Underground city----
         venue  freq
0  Coffee Shop  0.09
1         Café  0.07
2   Steakhouse  0.04
3        Hotel  0.04
4

                             venue  freq
0             Fast Food Restaurant   1.0
1                Accessories Store   0.0
2                    Metro Station   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Runnymede, Swansea----
              venue  freq
0       Coffee Shop  0.08
1              Café  0.08
2       Pizza Place  0.06
3             Diner  0.06
4  Sushi Restaurant  0.06


----Ryerson, Garden District----
                       venue  freq
0                Coffee Shop  0.10
1             Clothing Store  0.06
2             Cosmetics Shop  0.04
3                       Café  0.03
4  Middle Eastern Restaurant  0.03


----Scarborough Village----
               venue  freq
0  Convenience Store  0.33
1                Spa  0.33
2         Playground  0.33
3  Accessories Store  0.00
4      Metro Station  0.00


----Silver Hills, York Mills----
                             venue  freq
0                        Cafeteria   1.0
1              

### Create a dataframe containing the 10 most common venue categories for each neigborhood  

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [22]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted.head()

(101, 11)


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Steakhouse,Bar,Thai Restaurant,Gym,Breakfast Spot,Hotel,Asian Restaurant,Restaurant
1,Agincourt,Lounge,Breakfast Spot,Skating Rink,Chinese Restaurant,Sandwich Place,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Park,Playground,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Fast Food Restaurant,Pizza Place,Sandwich Place,Coffee Shop,Beer Store,Pharmacy,Fried Chicken Joint,Empanada Restaurant,Electronics Store
4,"Alderwood, Long Branch",Pizza Place,Pool,Skating Rink,Gym,Pharmacy,Coffee Shop,Pub,Sandwich Place,Yoga Studio,Diner


### Create neighborhood clusters based on the top 10 venue categories

In [23]:
from sklearn.cluster import KMeans

kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

print(len(kmeans.labels_))
print(kmeans.labels_)
# check cluster labels generated for each row in the dataframe
#kmeans.labels_[0:10] 

101
[1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 2 1 1 1 1 2
 1 2 4 3 1 1 1 0 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 1 1 2]


### Add the cluster labels to the dataset with top 10 venue categories per neighborhood and merge that dataset with the location dataset for the toronto neighborhoods 

In [24]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = df_postal_loc.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

print(toronto_merged.shape)
toronto_merged.head()

(103, 16)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,3.0,Fast Food Restaurant,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Field
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,1.0,Bar,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Discount Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Breakfast Spot,Electronics Store,Rental Car Location,Intersection,Mexican Restaurant,Medical Center,Pizza Place,Empanada Restaurant,Ethiopian Restaurant,Eastern European Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1.0,Coffee Shop,Korean Restaurant,Yoga Studio,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Thai Restaurant,Fried Chicken Joint,Bank,Bakery,Athletics & Sports,Caribbean Restaurant,Cuban Restaurant,Costume Shop,Farmers Market


### Remove rows where the cluster label could not be calculated 

In [25]:
print(df_postal_loc.shape)
print(neighborhoods_venues_sorted.shape)
df1 = toronto_merged[toronto_merged.isna().any(axis=1)]
df1

(103, 5)
(101, 12)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,M1X,Scarborough,Upper Rouge,43.836125,-79.205636,,,,,,,,,,,
93,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,,,,,,


In [26]:
toronto_merged.dropna(inplace=True)
print(toronto_merged.shape)

(101, 16)


In [27]:
df1 = toronto_merged[toronto_merged.isna().any(axis=1)]
df1

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Generate a map of Toronto with the neighborhoods and how they cluster together (colour indicated)

In [28]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude + 0.05, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    cluster = int(cluster)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Conclusion

I currently live in the Humber Summit neighborhood and I want to move to the Guildwood, Morningside, West Hill neighborhood.

The map shows that both neighborhoods are in the same cluster and are thus pretty similar.

Below we also see that both are neighborhoods with plenty of restaurants.

In [29]:
f = (toronto_merged['PostalCode'] == 'M9L') | (toronto_merged['PostalCode'] == 'M1E')
toronto_merged[f]

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Breakfast Spot,Electronics Store,Rental Car Location,Intersection,Mexican Restaurant,Medical Center,Pizza Place,Empanada Restaurant,Ethiopian Restaurant,Eastern European Restaurant
96,M9L,North York,Humber Summit,43.756303,-79.565963,1.0,Pizza Place,Empanada Restaurant,Yoga Studio,Eastern European Restaurant,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
