In [1]:
## import the needed Library
import pandas as pd
import numpy as np
import urllib
from bs4 import BeautifulSoup
import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
print('package installed')

usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


package installed


usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


## 1.Scrap the Wikipedia Data

In [4]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_data = requests.get(url).text
soup = BeautifulSoup(html_data, 'html5lib')

## 1.1. Getting Data in reasonable shape and remove 'Not Assigned'

In [105]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

dataframe = df

dataframe.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


## 1.2 Finding shape of Data

In [107]:
dataframe.shape

(103, 3)

## Get Geocode Cordinates for the Data

In [8]:
!wget -q -O 'canada_geo.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


'wget' is not recognized as an internal or external command,
operable program or batch file.


In [112]:
geocode = pd.read_csv('https://cocl.us/Geospatial_data')
geocode['Latitude'] = geocode['Latitude'].astype('object')
geocode['Longitude'] = geocode['Longitude'].astype('object')
geocode.rename(columns={'Postal Code':'PostalCode'}, inplace = True)
geocode

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [114]:
#dataframe

data = pd.merge(dataframe,geocode,on='PostalCode')
data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
100,M7Y,East Toronto Business,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## 3. Explore and cluster the neighborhoods in Toronto

## 3.1 Create a map of the city with cordinates on top

In [115]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto-explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [117]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# Add markers to map
for lat, lng, label in zip(data['Latitude'], data['Longitude'], data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto) 
    
map_toronto

## 3.2 Selecting only boroughs that contain the word Toronto

In [118]:
df_toronto_borough = data[data['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_toronto_borough.shape

(39, 5)

## 3.3 Create a map of Toronto with all the additional boroughs on top

In [119]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto_borough['Latitude'], df_toronto_borough['Longitude'], df_toronto_borough['Borough'], df_toronto_borough['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 3.4 Use Foursquare API to explore the Neighborhoods

In [120]:
CLIENT_ID = 'T2FIUZHPXCZUKLYKFU3QQCPNVA1TBNBS5OFUCYVNPO00XYCG' # your Foursquare ID
CLIENT_SECRET = 'IARY5F15DL0L4EZN0MSTGU4UNVHDJWT1SKGMZ15MITGFWW1P' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T2FIUZHPXCZUKLYKFU3QQCPNVA1TBNBS5OFUCYVNPO00XYCG
CLIENT_SECRET:IARY5F15DL0L4EZN0MSTGU4UNVHDJWT1SKGMZ15MITGFWW1P


In [121]:
df_toronto_borough

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259
9,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106


## Get all the venues by borough and neighborhood

In [122]:
def getNearbyVenues(boroughs, neighborhoods, latitudes, longitudes, radius=5000):
    
    venues_list=[]
    for borough, neighborhood, lat, lng in zip(boroughs, neighborhoods, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            borough,
            neighborhood, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                  'Borough',
                  'Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [124]:
borough_neighborhood_venues = getNearbyVenues(boroughs=df_toronto_borough['Borough'],
                                             neighborhoods=df_toronto_borough['Neighborhood'],
                                             latitudes=df_toronto_borough['Latitude'],
                                             longitudes=df_toronto_borough['Longitude'])

borough_neighborhood_venues

Unnamed: 0,Borough,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site
3,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,Distillery Sunday Market,43.650075,-79.361832,Farmers Market
4,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,Rooster Coffee,43.651900,-79.365609,Coffee Shop
...,...,...,...,...,...,...,...,...
3895,East Toronto Business,Enclave of M4L,43.662744,-79.321558,Re: Reading,43.678507,-79.347678,Bookstore
3896,East Toronto Business,Enclave of M4L,43.662744,-79.321558,Mezes,43.677962,-79.350196,Greek Restaurant
3897,East Toronto Business,Enclave of M4L,43.662744,-79.321558,I'm On The Beach,43.670364,-79.284696,Beach
3898,East Toronto Business,Enclave of M4L,43.662744,-79.321558,Glen Manor Ravine,43.676821,-79.293942,Trail


## 3.6 Checking the size of the dataframe 

In [125]:
borough_neighborhood_venues.shape

(3900, 8)

In [126]:
borough_neighborhood_venues.groupby(['Borough', 'Neighborhood']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Neighborhood,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Central Toronto,Davisville,100,100,100,100,100,100
Central Toronto,Davisville North,100,100,100,100,100,100
Central Toronto,Forest Hill North & West,100,100,100,100,100,100
Central Toronto,Lawrence Park,100,100,100,100,100,100
Central Toronto,"Moore Park, Summerhill East",100,100,100,100,100,100
Central Toronto,North Toronto West,100,100,100,100,100,100
Central Toronto,Roselawn,100,100,100,100,100,100
Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park",100,100,100,100,100,100
Central Toronto,"The Annex, North Midtown, Yorkville",100,100,100,100,100,100
Downtown Toronto,Berczy Park,100,100,100,100,100,100


### Lets see how many unique categories we can find

In [127]:
print(f'There are {borough_neighborhood_venues["Venue Category"].nunique()} uniques categories.')

There are 168 uniques categories.


## 3.7 Analyze each neighborhood and venue

In [128]:
# one hot encoding
toronto_onehot = pd.get_dummies(borough_neighborhood_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Borough'] = borough_neighborhood_venues['Borough'] 
toronto_onehot['Neighborhoods'] = borough_neighborhood_venues['Neighborhood'] 
toronto_onehot['Venue'] = borough_neighborhood_venues['Venue'] 

# move neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Borough,Neighborhoods,Venue,Afghan Restaurant,American Restaurant,Amphitheater,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Yoga Studio,Zoo
0,Downtown Toronto,"Regent Park, Harbourfront",Roselle Desserts,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Downtown Toronto,"Regent Park, Harbourfront",Tandem Coffee,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Downtown Toronto,"Regent Park, Harbourfront",The Distillery Historic District,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Downtown Toronto,"Regent Park, Harbourfront",Distillery Sunday Market,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Downtown Toronto,"Regent Park, Harbourfront",Rooster Coffee,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [129]:
toronto_onehot.shape

(3900, 171)

In [130]:
toronto_grouped = toronto_onehot.groupby(['Borough', 'Neighborhoods']).mean().reset_index()
toronto_grouped

Unnamed: 0,Borough,Neighborhoods,Afghan Restaurant,American Restaurant,Amphitheater,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Yoga Studio,Zoo
0,Central Toronto,Davisville,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.01,0.0
1,Central Toronto,Davisville North,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
2,Central Toronto,Forest Hill North & West,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
3,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,...,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0
4,Central Toronto,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Central Toronto,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
6,Central Toronto,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
7,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
8,Central Toronto,"The Annex, North Midtown, Yorkville",0.0,0.0,0.0,0.0,0.02,0.02,0.01,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
9,Downtown Toronto,Berczy Park,0.0,0.01,0.0,0.01,0.02,0.0,0.0,0.01,...,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0


## Print each neighborhood along with the top 5 most common venues

In [131]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhoods']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhoods'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[2:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
                venue  freq
0                Park  0.09
1  Italian Restaurant  0.08
2                Café  0.06
3              Bakery  0.05
4         Coffee Shop  0.04


----Davisville North----
                venue  freq
0                Park  0.11
1  Italian Restaurant  0.07
2                Café  0.07
3         Coffee Shop  0.06
4              Bakery  0.04


----Forest Hill North & West----
                venue  freq
0                Park  0.11
1                Café  0.08
2  Italian Restaurant  0.08
3      Ice Cream Shop  0.04
4         Coffee Shop  0.04


----Lawrence Park----
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.07
2                Café  0.07
3                Park  0.05
4              Bakery  0.05


----Moore Park, Summerhill East----
                venue  freq
0                Park  0.11
1                Café  0.08
2         Coffee Shop  0.07
3  Italian Restaurant  0.06
4           Gastropub  0.04


----North Toron

## Convert to a dataframe

In [132]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[2:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [133]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough', 'Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 2:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Davisville,Park,Italian Restaurant,Café,Bakery,Coffee Shop,Grocery Store,Indian Restaurant,BBQ Joint,Bookstore,Spa
1,Central Toronto,Davisville North,Park,Italian Restaurant,Café,Coffee Shop,Bakery,Grocery Store,BBQ Joint,French Restaurant,Spa,Supermarket
2,Central Toronto,Forest Hill North & West,Park,Café,Italian Restaurant,Ice Cream Shop,Coffee Shop,Indian Restaurant,Japanese Restaurant,Bakery,Dessert Shop,Grocery Store
3,Central Toronto,Lawrence Park,Italian Restaurant,Coffee Shop,Café,Park,Bakery,Sushi Restaurant,Grocery Store,Ice Cream Shop,Indian Restaurant,Dessert Shop
4,Central Toronto,"Moore Park, Summerhill East",Park,Café,Coffee Shop,Italian Restaurant,Gastropub,Japanese Restaurant,BBQ Joint,Grocery Store,Bakery,Beer Bar


In [134]:
neighborhoods_venues_sorted.shape

(39, 12)

## 3.8 Cluster Neighborhoods

In [135]:

# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(['Borough', 'Neighborhoods'], 1)

# run k-means clustering
kmeans = KMeans(init='k-means++', n_clusters=kclusters, random_state=1, max_iter=1000).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 3, 3, 3, 3, 3, 3, 3, 1, 2])

In [136]:
# add clustering labels
neighborhoods_venues_sorted['Cluster Labels'] = kmeans.labels_

toronto_merged = df_toronto_borough.copy()

# merge toronto_grouped with df_toronto_borough to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(['Borough'], axis=1).set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Coffee Shop,Park,Hotel,Café,Neighborhood,Gastropub,Thai Restaurant,Pizza Place,Plaza,Restaurant,2
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,Coffee Shop,Park,Hotel,Pizza Place,Plaza,Restaurant,Japanese Restaurant,Farmers Market,Bookstore,Sandwich Place,2
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,Coffee Shop,Park,Hotel,Restaurant,Beer Bar,Sandwich Place,Farmers Market,Plaza,Pizza Place,Japanese Restaurant,2
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Park,Coffee Shop,Beach,Café,Bakery,Brewery,Pub,Pizza Place,Gastropub,Ice Cream Shop,4
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,Coffee Shop,Park,Hotel,Japanese Restaurant,Farmers Market,Plaza,Restaurant,Pizza Place,Sandwich Place,Performing Arts Venue,2


In [137]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## 3.9 Examining the clusters

In [138]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
8,West Toronto,Café,Italian Restaurant,Park,Coffee Shop,Beer Bar,Bar,Restaurant,Brewery,Indian Restaurant,Ice Cream Shop,0
22,West Toronto,Café,Italian Restaurant,Park,Coffee Shop,Bakery,Brewery,Bar,Ice Cream Shop,Indian Restaurant,Dog Run,0
25,West Toronto,Park,Café,Bakery,Italian Restaurant,Coffee Shop,Pizza Place,Bar,Brewery,Ice Cream Shop,Scenic Lookout,0
28,West Toronto,Café,Park,Coffee Shop,Italian Restaurant,Brewery,Ice Cream Shop,Bar,Bakery,Scenic Lookout,Indian Restaurant,0


## Cluster 1

In [139]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
6,Downtown Toronto,Café,Coffee Shop,Park,Beer Bar,Bar,Pizza Place,Italian Restaurant,Bakery,Sandwich Place,Restaurant,1
11,West Toronto,Park,Beer Bar,Pizza Place,Bakery,Bar,Café,Restaurant,Brewery,Sandwich Place,Hotel,1
14,West Toronto,Park,Beer Bar,Bar,Bakery,Café,Pizza Place,Sandwich Place,Coffee Shop,Restaurant,Wine Bar,1
24,Central Toronto,Café,Beer Bar,Park,Coffee Shop,Sandwich Place,Pizza Place,Italian Restaurant,Bar,Indian Restaurant,Spa,1
27,Downtown Toronto,Café,Pizza Place,Park,Beer Bar,Sandwich Place,Bakery,Coffee Shop,Japanese Restaurant,Gym,Hotel,1
30,Downtown Toronto,Park,Beer Bar,Bakery,Café,Pizza Place,Coffee Shop,Hotel,Sandwich Place,Gym,Japanese Restaurant,1
32,Downtown Toronto,Park,Bakery,Pizza Place,Café,Hotel,Coffee Shop,Beer Bar,Japanese Restaurant,Restaurant,Gym,1


## Cluster 2

In [140]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,Downtown Toronto,Coffee Shop,Park,Hotel,Café,Neighborhood,Gastropub,Thai Restaurant,Pizza Place,Plaza,Restaurant,2
1,Downtown Toronto,Coffee Shop,Park,Hotel,Pizza Place,Plaza,Restaurant,Japanese Restaurant,Farmers Market,Bookstore,Sandwich Place,2
2,Downtown Toronto,Coffee Shop,Park,Hotel,Restaurant,Beer Bar,Sandwich Place,Farmers Market,Plaza,Pizza Place,Japanese Restaurant,2
4,Downtown Toronto,Coffee Shop,Park,Hotel,Japanese Restaurant,Farmers Market,Plaza,Restaurant,Pizza Place,Sandwich Place,Performing Arts Venue,2
5,Downtown Toronto,Coffee Shop,Hotel,Café,Japanese Restaurant,Beer Bar,Park,Restaurant,Bookstore,Sandwich Place,Gym,2
7,Downtown Toronto,Coffee Shop,Park,Hotel,Café,Beer Bar,Pizza Place,Japanese Restaurant,Plaza,Bakery,Farmers Market,2
10,Downtown Toronto,Coffee Shop,Park,Bakery,Hotel,Café,Pizza Place,Sandwich Place,Plaza,Gym,Farmers Market,2
13,Downtown Toronto,Coffee Shop,Park,Hotel,Café,Japanese Restaurant,Gym,Sandwich Place,Beer Bar,Bakery,Farmers Market,2
16,Downtown Toronto,Coffee Shop,Park,Hotel,Café,Pizza Place,Japanese Restaurant,Plaza,Bakery,Farmers Market,Gym,2
33,Downtown Toronto,Coffee Shop,Park,Café,Japanese Restaurant,Restaurant,Beer Bar,Italian Restaurant,Bakery,Farmers Market,Historic Site,2


## Cluster 3

In [141]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
18,Central Toronto,Italian Restaurant,Coffee Shop,Café,Park,Bakery,Sushi Restaurant,Grocery Store,Ice Cream Shop,Indian Restaurant,Dessert Shop,3
19,Central Toronto,Café,Italian Restaurant,Coffee Shop,Park,Clothing Store,Grocery Store,Indian Restaurant,Ice Cream Shop,Bakery,Spa,3
20,Central Toronto,Park,Italian Restaurant,Café,Coffee Shop,Bakery,Grocery Store,BBQ Joint,French Restaurant,Spa,Supermarket,3
21,Central Toronto,Park,Café,Italian Restaurant,Ice Cream Shop,Coffee Shop,Indian Restaurant,Japanese Restaurant,Bakery,Dessert Shop,Grocery Store,3
23,Central Toronto,Park,Café,Italian Restaurant,Coffee Shop,Bakery,Liquor Store,Indian Restaurant,BBQ Joint,Ice Cream Shop,Grocery Store,3
26,Central Toronto,Park,Italian Restaurant,Café,Bakery,Coffee Shop,Grocery Store,Indian Restaurant,BBQ Joint,Bookstore,Spa,3
29,Central Toronto,Park,Café,Coffee Shop,Italian Restaurant,Gastropub,Japanese Restaurant,BBQ Joint,Grocery Store,Bakery,Beer Bar,3
31,Central Toronto,Café,Coffee Shop,Park,Italian Restaurant,Gastropub,Japanese Restaurant,Beer Bar,Dance Studio,Historic Site,Grocery Store,3


## Cluster 4

In [142]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
3,East Toronto,Park,Coffee Shop,Beach,Café,Bakery,Brewery,Pub,Pizza Place,Gastropub,Ice Cream Shop,4
9,East York/East Toronto,Park,Coffee Shop,Bakery,Brewery,Café,Pizza Place,Beach,Ice Cream Shop,French Restaurant,Farmers Market,4
12,East Toronto,Park,Coffee Shop,Café,Bakery,Brewery,Gastropub,Greek Restaurant,Farmers Market,Pizza Place,Historic Site,4
15,East Toronto,Coffee Shop,Park,Café,Brewery,Beach,Pizza Place,Bakery,French Restaurant,Ice Cream Shop,Gastropub,4
17,East Toronto,Coffee Shop,Park,Bakery,Café,Brewery,Farmers Market,Beach,American Restaurant,Ice Cream Shop,Bookstore,4
38,East Toronto Business,Park,Coffee Shop,Café,Beach,Brewery,Bakery,Pizza Place,French Restaurant,Ice Cream Shop,Bookstore,4
