## Clustering the neighborhoods in Toronto

#### 1. build the coordinate dataframe

In [1]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table')

Postalcode = []
Borough = []
Neighbourhood = []

for tr_cell in table.find_all('tr'):
    for td_cell in tr_cell.find_all('td'):
        if td_cell.span.text != 'Not assigned':
            code = td_cell.b.text
            locat = td_cell.span.text
            if '(' in locat:
                bor = locat[:locat.find('(')]
                hood = locat[locat.find('(')+1:locat.find(')')].replace('/',',')
            else:
                bor = locat[locat.find('/')+2:]
                hood = locat[:locat.find('/')-1]
            Postalcode.append(code)
            Borough.append(bor)
            Neighbourhood.append(hood)
            
err1 = 'MississaugaCanada Post Gateway Processing Centre'
Borough[Borough.index(err1)] = 'Mississauga Canada Post Gateway Processing Centre'
err2 = 'East TorontoBusiness reply mail Processing Centre969 Eastern'
Borough[Borough.index(err2)] = 'East Toronto Business reply mail Processing Centre969 Eastern'

coordinate = pd.read_csv('Geospatial_Coordinates.csv')
coordinate.set_index('Postal Code', inplace = True)
latitude = []
longitude = []
for i in range(coordinate.shape[0]):
    latitude.append(coordinate.loc[Postalcode[i],'Latitude'])
    longitude.append(coordinate.loc[Postalcode[i],'Longitude'])
coordinate_dict = {'Postalcode': Postalcode, 
                   'Borough': Borough, 
                   'Neighbourhood': Neighbourhood, 
                   'Latitude': latitude, 
                   'Longitude': longitude}
df_coordinate = pd.DataFrame.from_dict(coordinate_dict)
df_coordinate

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Ontario Provincial Government,Queen's Park,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto Business reply mail Processing Ce...,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


#### 2. create a map of Toronto

In [3]:
import folium

In [15]:
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df_coordinate['Latitude'], 
                                           df_coordinate['Longitude'], 
                                           df_coordinate['Borough'], 
                                           df_coordinate['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
map_toronto

In [16]:
CLIENT_ID = 'ADWDOUGGD32EEVAXCTWCNCCA52GVGFU003G1QXRIGOPUACYV'
CLIENT_SECRET = 'CQYLABIZUIXLUGURGP0G2HYTRGAEDL5Y422A5ULVZYTQVFFK'
VERSION = '20180605'

In [17]:
northyork_data = df_coordinate[df_coordinate['Borough'] == 'North York'].reset_index(drop=True)
northyork_data

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M3C,North York,Don Mills,43.7259,-79.340923
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M3H,North York,"Bathurst Manor , Wilson Heights , Downsview North",43.754328,-79.442259
8,M2J,North York,"Fairview , Henry Farm , Oriole",43.778517,-79.346556
9,M3J,North York,"Northwood Park , York University",43.76798,-79.487262


#### 3. create a map of North York and its neighbours

In [25]:
map_ny = folium.Map(location = [43.76, -79.45], zoom_start = 11.5)
for lat, lng, label in zip(northyork_data['Latitude'], 
                           northyork_data['Longitude'], 
                           northyork_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ny)  
map_ny

#### 4. get the top 100 venues in the neighborhood Parkwoods from North York

In [27]:
neighborhood_latitude = northyork_data.loc[0, 'Latitude']
neighborhood_longitude = northyork_data.loc[0, 'Longitude']

neighborhood_name = northyork_data.loc[0, 'Neighbourhood']

print('Latitude and longitude values of "{}" are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of "Parkwoods" are 43.7532586, -79.3296565.


In [28]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, 
                                                                                                                           CLIENT_SECRET, 
                                                                                                                           43.76, 
                                                                                                                           -79.45,
                                                                                                                           VERSION,
                                                                                                                           radius,
                                                                                                                           LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e761d539388d7001b2d3a5d'},
 'response': {'headerLocation': 'Bathurst Manor',
  'headerFullLocation': 'Bathurst Manor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 5,
  'suggestedBounds': {'ne': {'lat': 43.7645000045, 'lng': -79.4437810392564},
   'sw': {'lat': 43.755499995499996, 'lng': -79.45621896074361}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57d493a2498ec7f2c7f1c607',
       'name': 'Deist Beats Digital Media Services',
       'location': {'address': '92 Combe',
        'lat': 43.759635876342664,
        'lng': -79.45092558860779,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.759635876342664,
          'lng': -79.45092558860779}],
        'distance': 84,
        'posta

In [29]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [32]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Deist Beats Digital Media Services,Photography Studio,43.759636,-79.450926
1,Calvin Klein,Men's Store,43.759257,-79.4504
2,Wilmington Park Tennis Courts,Tennis Court,43.760482,-79.45528
3,Irving W. Chapley Community Centre & Park,Park,43.761247,-79.455509
4,Teh Club House,Nightclub,43.755571,-79.449518


In [33]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

5 venues were returned by Foursquare.


In [34]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### 5. get venues for each neighborhood in North York

In [35]:
northyork_venues = getNearbyVenues(names=northyork_data['Neighbourhood'],
                                   latitudes=northyork_data['Latitude'],
                                   longitudes=northyork_data['Longitude'])

Parkwoods
Victoria Village
Lawrence Manor , Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor , Wilson Heights , Downsview North
Fairview , Henry Farm , Oriole
Northwood Park , York University
Bayview Village
Downsview
York Mills , Silver Hills
Downsview
North Park , Maple Leaf Park , Upwood Park
Humber Summit
Willowdale , Newtonbrook
Downsview
Bedford Park , Lawrence Manor East
Humberlea , Emery
Willowdale
Downsview
York Mills West
Willowdale


In [88]:
northyork_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
...,...,...,...,...,...,...,...
238,Willowdale,43.782736,-79.442259,Dollarama,43.784670,-79.446670,Discount Store
239,Willowdale,43.782736,-79.442259,Tim Hortons,43.780940,-79.444231,Coffee Shop
240,Willowdale,43.782736,-79.442259,Price Chopper,43.783237,-79.446339,Grocery Store
241,Willowdale,43.782736,-79.442259,Hartman's,43.784312,-79.446213,Butcher


In [89]:
northyork_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor , Wilson Heights , Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",26,26,26,26,26,26
Don Mills,26,26,26,26,26,26
Downsview,14,14,14,14,14,14
"Fairview , Henry Farm , Oriole",65,65,65,65,65,65
Glencairn,5,5,5,5,5,5
Hillcrest Village,4,4,4,4,4,4
Humber Summit,2,2,2,2,2,2
"Humberlea , Emery",2,2,2,2,2,2


In [82]:
print('There are {} uniques categories.'.format(len(northyork_venues['Venue Category'].unique())))

There are 101 uniques categories.


In [93]:
# one hot encoding
ny_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
ny_onehot['Latitude'] = northyork_venues['Neighborhood Latitude']
ny_onehot['Neighborhood'] = northyork_venues['Neighborhood'] 


# move neighborhood column to the first column
fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
ny_onehot = ny_onehot[fixed_columns]

ny_onehot

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store,Latitude
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.753259
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.753259
2,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.725882
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.725882
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.725882
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,Willowdale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.782736
239,Willowdale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.782736
240,Willowdale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.782736
241,Willowdale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,43.782736


In [84]:
ny_onehot.shape

(243, 102)

In [94]:
ny_grouped = ny_onehot.groupby('Latitude').mean().reset_index()
ny_grouped

Unnamed: 0,Latitude,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,43.709577,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,43.713756,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,43.718518,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429
3,43.724766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,43.725882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,43.7259,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,...,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,43.728496,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,43.733283,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0
8,43.737473,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,43.739015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### 6. get top 10 venues per neighborhood

In [95]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [99]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Latitude']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Latitude'] = ny_grouped['Latitude']

for ind in np.arange(ny_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Latitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.709577,Pizza Place,Park,Japanese Restaurant,Pub,Women's Store,Cupcake Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall
1,43.713756,Park,Construction & Landscaping,Bakery,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Cosmetics Shop
2,43.718518,Clothing Store,Furniture / Home Store,Women's Store,Shoe Store,Boutique,Coffee Shop,Event Space,Vietnamese Restaurant,Gift Shop,Miscellaneous Shop
3,43.724766,Fabric Shop,Baseball Field,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,43.725882,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Women's Store,Department Store,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
5,43.7259,Coffee Shop,Gym,Asian Restaurant,Restaurant,Beer Store,Chinese Restaurant,Japanese Restaurant,Italian Restaurant,Concert Hall,Dim Sum Restaurant
6,43.728496,Food Truck,Business Service,Baseball Field,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
7,43.733283,Italian Restaurant,Restaurant,Coffee Shop,Sandwich Place,Juice Bar,Liquor Store,Indian Restaurant,Café,Butcher,Comfort Food Restaurant
8,43.737473,Park,Airport,Dim Sum Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop
9,43.739015,Grocery Store,Park,Bank,Shopping Mall,Women's Store,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping


#### 7. run k-means to cluster the neighborhoods into 5 clusters

In [100]:
ny_grouped

Unnamed: 0,Latitude,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,...,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,43.709577,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,43.713756,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,43.718518,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429
3,43.724766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,43.725882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,43.7259,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,...,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,43.728496,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,43.733283,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0
8,43.737473,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,43.739015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [117]:
from sklearn.cluster import KMeans
kclusters = 3
ny_grouped_clustering = ny_grouped.drop('Latitude', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)
kmeans.labels_

array([1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1,
       1], dtype=int32)

In [105]:
ny_data

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073
5,M3C,North York,Don Mills,43.7259,-79.340923
6,M2H,North York,Hillcrest Village,43.803762,-79.363452
7,M3H,North York,"Bathurst Manor , Wilson Heights , Downsview North",43.754328,-79.442259
8,M2J,North York,"Fairview , Henry Farm , Oriole",43.778517,-79.346556
9,M3J,North York,"Northwood Park , York University",43.76798,-79.487262


#### 8. visualize the clusters in the map

In [118]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location = [43.76, -79.45], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ny_data['Latitude'], 
                                  ny_data['Longitude'], 
                                  ny_data['Neighbourhood'],
                                  kmeans.labels_):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters