# Segmenting and Clustering Neighborhoods in the city of Toronto, Canada

### Downloading all the basic libraries that will be needed

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

### Since the neighborhood data for Toronto is not readily available online, it has to extracted from Wikipedia

### We'll use the BeautifulSoup Package in Python to scrape the required data from Wikipedia.

In [2]:
import requests
from bs4 import BeautifulSoup

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
table = soup.find('table', class_='wikitable sortable').tbody
rows = table.find_all('tr')
cols = [cell.text.replace('\n','') for cell in rows[0].find_all('th')]

df = pd.DataFrame(columns = cols)

for i in range(1,len(rows)):
    tds = rows[i].find_all('td')
    values = [tds[0].text,tds[1].text,tds[2].text.replace('\n','') ]
    
    df = df.append(pd.Series(values, index=cols), ignore_index=True)
df.head()    

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


### Once the table is imported, rows with Borough values 'Not Assigned' are dropped.

In [4]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop=True)



In [5]:
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Rows containing the same Postcodes are grouped together 

In [6]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(", ".join)


### Since this process converted the dataframe into a series, it is converted back to a dataframe

In [7]:
pdf = df.to_frame()

In [8]:
pdf.reset_index(inplace=True)

In [9]:
pdf.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Neighbourhoods with 'Not assigned' values are given the same values as its corresponding Borough

In [10]:
df = pdf.replace('Not assigned',np.NaN)

In [11]:
df['Neighbourhood'] = df['Neighbourhood'].fillna(df['Borough'])

In [231]:
df.head(100)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [12]:
df.shape

(103, 3)

### Loading the csv file for Latitude and Longitude

In [13]:
df_ll = pd.read_csv('https://cocl.us/Geospatial_data')

### Chaning the name of the common column to 'Postcode' so that joing the two dataframes will be simplified

In [14]:
df_ll.columns = ['Postcode','Latitude','Longitude']

### The two dataframes are then merged

In [15]:
df_cd = pd.merge(df, df_ll, how='inner', on = 'Postcode')

In [16]:
df_cd.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


### Using geopy to get latitude and longitude of Toronto

In [17]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Creating the map of Toronto with the neighbourhoods superimposed on top

In [18]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_cd['Latitude'], df_cd['Longitude'], df_cd['Borough'], df_cd['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Let's segment and cluster only the neighbourhoods in East York

In [19]:
east_york = df_cd[df_cd['Borough'] == 'East York'].reset_index(drop=True)
east_york.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
1,M4C,East York,Woodbine Heights,43.695344,-79.318389
2,M4G,East York,Leaside,43.70906,-79.363452
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372
4,M4J,East York,East Toronto,43.685347,-79.338106


### Obtaining coordinates of East York

In [20]:
address = 'East York, Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of East York are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of East York are 43.6913391, -79.3278212.


### Let's visualize East York neighbourhoods

In [21]:
map_east_york = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(east_york['Latitude'], east_york['Longitude'], east_york['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_east_york)  
    
map_east_york

### Define FourSquared credentials

In [26]:
CLIENT_ID = 'YGLRVRU3XAIHB0ZTK42TTBGI2WD2GPVKE3TFZZKNSR2LWKLN' # your Foursquare ID
CLIENT_SECRET = 'ZGXCCUSPGOY2ZVP4MQWO5ACYCSVLHNWEFJWSS5BH1JLTGLM2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
limit = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YGLRVRU3XAIHB0ZTK42TTBGI2WD2GPVKE3TFZZKNSR2LWKLN
CLIENT_SECRET:ZGXCCUSPGOY2ZVP4MQWO5ACYCSVLHNWEFJWSS5BH1JLTGLM2


## Explore the neighbourhoods in East York

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Getting nearby venues for each neighbourhood

In [28]:
east_york_venues = getNearbyVenues(names=east_york['Neighbourhood'],
                                   latitudes=east_york['Latitude'],
                                   longitudes=east_york['Longitude']
                                  )

Woodbine Gardens, Parkview Hill
Woodbine Heights
Leaside
Thorncliffe Park
East Toronto


### The size of the resulting DataFrame

In [29]:
print(east_york_venues.shape)

(77, 7)


### Checking how many venues per neighbourhood were returned

In [30]:
east_york_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
East Toronto,3,3,3,3,3,3
Leaside,35,35,35,35,35,35
Thorncliffe Park,18,18,18,18,18,18
"Woodbine Gardens, Parkview Hill",13,13,13,13,13,13
Woodbine Heights,8,8,8,8,8,8


## Analyze each Neighbourhood

In [32]:
# one hot encoding
east_york_onehot = pd.get_dummies(east_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
east_york_onehot['Neighborhood'] = east_york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [east_york_onehot.columns[-1]] + list(east_york_onehot.columns[:-1])
east_york_onehot = east_york_onehot[fixed_columns]

east_york_onehot.head()

Unnamed: 0,Neighborhood,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Stop,Café,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Discount Store,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,Furniture / Home Store,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Housing Development,Indian Restaurant,Intersection,Liquor Store,Mexican Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Shopping Mall,Skating Rink,Smoothie Shop,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Warehouse Store,Yoga Studio
0,"Woodbine Gardens, Parkview Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Woodbine Gardens, Parkview Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Woodbine Gardens, Parkview Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,"Woodbine Gardens, Parkview Hill",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Woodbine Gardens, Parkview Hill",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


### Grouping rows by taking mean of frequency of occurence of each category

In [33]:
east_york_grouped = east_york_onehot.groupby('Neighborhood').mean().reset_index()
east_york_grouped

Unnamed: 0,Neighborhood,Athletics & Sports,Bagel Shop,Bank,Beer Store,Bike Shop,Breakfast Spot,Brewery,Burger Joint,Bus Stop,Café,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Curling Ice,Dessert Shop,Discount Store,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Food & Drink Shop,Furniture / Home Store,Gastropub,Grocery Store,Gym,Gym / Fitness Center,Housing Development,Indian Restaurant,Intersection,Liquor Store,Mexican Restaurant,Park,Pet Store,Pharmacy,Pizza Place,Restaurant,Sandwich Place,Shopping Mall,Skating Rink,Smoothie Shop,Sporting Goods Shop,Sports Bar,Supermarket,Sushi Restaurant,Warehouse Store,Yoga Studio
0,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Leaside,0.0,0.028571,0.028571,0.028571,0.028571,0.028571,0.028571,0.057143,0.0,0.0,0.028571,0.085714,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.028571,0.028571,0.057143,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.028571,0.0,0.0,0.028571,0.028571,0.028571,0.0,0.028571,0.085714,0.028571,0.028571,0.057143,0.0,0.0
2,Thorncliffe Park,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.055556,0.0,0.055556,0.111111,0.055556,0.055556,0.0,0.055556,0.0,0.055556,0.055556,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.055556,0.055556
3,"Woodbine Gardens, Parkview Hill",0.076923,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.076923,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Woodbine Heights,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Printing each neighbourhood and its top 5 venues

In [34]:
num_top_venues = 5

for hood in east_york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = east_york_grouped[east_york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----East Toronto----
                 venue  freq
0    Convenience Store  0.33
1                 Park  0.33
2          Coffee Shop  0.33
3           Restaurant  0.00
4  Housing Development  0.00


----Leaside----
                 venue  freq
0          Coffee Shop  0.09
1  Sporting Goods Shop  0.09
2        Grocery Store  0.06
3         Burger Joint  0.06
4     Sushi Restaurant  0.06


----Thorncliffe Park----
               venue  freq
0  Indian Restaurant  0.11
1      Grocery Store  0.06
2     Discount Store  0.06
3    Warehouse Store  0.06
4        Supermarket  0.06


----Woodbine Gardens, Parkview Hill----
                  venue  freq
0  Fast Food Restaurant  0.15
1           Pizza Place  0.15
2    Athletics & Sports  0.08
3                  Café  0.08
4          Intersection  0.08


----Woodbine Heights----
                venue  freq
0  Athletics & Sports  0.12
1          Beer Store  0.12
2                Park  0.12
3            Bus Stop  0.12
4        Skating Rink  0.12




### Convert the above into a pandas dataframe

In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = east_york_grouped['Neighborhood']

for ind in np.arange(east_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(east_york_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.columns = ['Neighbourhood','1st Most Common Venue','2nd Most Common Venue','3rd Most Common Venue','4th Most Common Venue','5th Most Common Venue','6th Most Common Venue','7th Most Common Venue','8th Most Common Venue','9th Most Common Venue','10th Most Common Venue']
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,Coffee Shop,Park,Convenience Store,Yoga Studio,Fish & Chips Shop,Fast Food Restaurant,Electronics Store,Discount Store,Dessert Shop,Curling Ice
1,Leaside,Sporting Goods Shop,Coffee Shop,Sushi Restaurant,Furniture / Home Store,Grocery Store,Burger Joint,Liquor Store,Mexican Restaurant,Food & Drink Shop,Electronics Store
2,Thorncliffe Park,Indian Restaurant,Yoga Studio,Pharmacy,Coffee Shop,Warehouse Store,Grocery Store,Gym,Housing Development,Intersection,Liquor Store
3,"Woodbine Gardens, Parkview Hill",Pizza Place,Fast Food Restaurant,Gastropub,Bank,Breakfast Spot,Café,Gym / Fitness Center,Intersection,Pet Store,Pharmacy
4,Woodbine Heights,Athletics & Sports,Skating Rink,Curling Ice,Cosmetics Shop,Park,Bus Stop,Pharmacy,Beer Store,Breakfast Spot,Convenience Store


## Clustering the Neighbourhoods

In [51]:
# set number of clusters
kclusters = 5

east_york_grouped_clustering = east_york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(east_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 3, 4, 0], dtype=int32)

### Creating new dataframe that contains the cluster as well as the top venues

In [55]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

east_york_merged = east_york


east_york_merged = east_york_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

east_york_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,4,Pizza Place,Fast Food Restaurant,Gastropub,Bank,Breakfast Spot,Café,Gym / Fitness Center,Intersection,Pet Store,Pharmacy
1,M4C,East York,Woodbine Heights,43.695344,-79.318389,0,Athletics & Sports,Skating Rink,Curling Ice,Cosmetics Shop,Park,Bus Stop,Pharmacy,Beer Store,Breakfast Spot,Convenience Store
2,M4G,East York,Leaside,43.70906,-79.363452,1,Sporting Goods Shop,Coffee Shop,Sushi Restaurant,Furniture / Home Store,Grocery Store,Burger Joint,Liquor Store,Mexican Restaurant,Food & Drink Shop,Electronics Store
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372,3,Indian Restaurant,Yoga Studio,Pharmacy,Coffee Shop,Warehouse Store,Grocery Store,Gym,Housing Development,Intersection,Liquor Store
4,M4J,East York,East Toronto,43.685347,-79.338106,2,Coffee Shop,Park,Convenience Store,Yoga Studio,Fish & Chips Shop,Fast Food Restaurant,Electronics Store,Discount Store,Dessert Shop,Curling Ice


## Finally let's visualize the clusters!

In [56]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(east_york_merged['Latitude'], east_york_merged['Longitude'], east_york_merged['Neighbourhood'], east_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Thank You!