In [1]:
pip install lxml

Note: you may need to restart the kernel to use updated packages.


## Installing Geopy and Folium

In [2]:
#installing libraries
!conda install -c conda-forge geopy --yes 
!conda install -c conda-forge folium=0.5.0

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.1

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.1

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



## Importing other essential libaries 

In [3]:
#Importing essential libraries

import pandas as pd
import numpy as np 
import json
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 

print('Libraries imported.')

Libraries imported.


## Creating a data frame

In [4]:
#Creating a dataframe

dfs = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
for df in dfs:
    print(df)

    Postcode           Borough          Neighbourhood
0        M1A      Not assigned           Not assigned
1        M2A      Not assigned           Not assigned
2        M3A        North York              Parkwoods
3        M4A        North York       Victoria Village
4        M5A  Downtown Toronto           Harbourfront
..       ...               ...                    ...
282      M8Z         Etobicoke              Mimico NW
283      M8Z         Etobicoke     The Queensway West
284      M8Z         Etobicoke  Royal York South West
285      M8Z         Etobicoke         South of Bloor
286      M9Z      Not assigned           Not assigned

[287 rows x 3 columns]
                                                  0   \
0                                                NaN   
1  NL NS PE NB QC ON MB SK AB BC NU/NT YT A B C E...   
2                                                 NL   
3                                                  A   

                                               

## Cleansing the dataframe

In [5]:
#Creating a dataframe after eliminating unnecessary tables
Canada=dfs[0]
Canada

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


In [6]:
#Removing rows which have do not have an assigned Borough

import numpy as np

Canada['Borough']=Canada['Borough'].replace('Not assigned',np.nan)
Canada=Canada.dropna(axis=0,subset=['Borough'])
Canada

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
281,M8Z,Etobicoke,Kingsway Park South West
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West


In [7]:
#Aggregating neigbourhoods who have same postcode
Canada_1=Canada.groupby('Postcode').agg(lambda x:','.join(set(x)))
Canada_1

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Morningside,Guildwood,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Richview Gardens,Kingsview Village,St. Phillip..."
M9V,Etobicoke,"Mount Olive,South Steeles,Thistletown,Silverst..."


In [8]:
#Filling up not assigned Neighbourhoods with boroughs

Canada_1.loc[Canada_1['Neighbourhood']=="Not assigned",'Neighbourhood']=Canada_1.loc[Canada_1['Neighbourhood']=="Not assigned",'Borough']
Canada_1

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern,Rouge"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Morningside,Guildwood,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Richview Gardens,Kingsview Village,St. Phillip..."
M9V,Etobicoke,"Mount Olive,South Steeles,Thistletown,Silverst..."


In [9]:
Canada_1.shape

(103, 2)

## Extracting latitude-longitude data

In [10]:
#Extracting Latitude Longitude data

LatLong=pd.read_csv("http://cocl.us/Geospatial_data")
LatLong

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [11]:
#Setting index to dataframes

LatLong.set_index("Postal Code")


Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476
...,...,...
M9N,43.706876,-79.518188
M9P,43.696319,-79.532242
M9R,43.688905,-79.554724
M9V,43.739416,-79.588437


## Merging Neighbourhood data with latitude-longitude data

In [12]:
#Merging dataframes

Canada_co=Canada_1.merge(LatLong,left_on="Postcode",right_on="Postal Code")
Canada_co

Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,Scarborough,"Malvern,Rouge",M1B,43.806686,-79.194353
1,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497
2,Scarborough,"Morningside,Guildwood,West Hill",M1E,43.763573,-79.188711
3,Scarborough,Woburn,M1G,43.770992,-79.216917
4,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...,...
98,York,Weston,M9N,43.706876,-79.518188
99,Etobicoke,Westmount,M9P,43.696319,-79.532242
100,Etobicoke,"Richview Gardens,Kingsview Village,St. Phillip...",M9R,43.688905,-79.554724
101,Etobicoke,"Mount Olive,South Steeles,Thistletown,Silverst...",M9V,43.739416,-79.588437


In [13]:
#Set index to postal code

Canada_co.set_index("Postal Code")

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
M1E,Scarborough,"Morningside,Guildwood,West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...
M9N,York,Weston,43.706876,-79.518188
M9P,Etobicoke,Westmount,43.696319,-79.532242
M9R,Etobicoke,"Richview Gardens,Kingsview Village,St. Phillip...",43.688905,-79.554724
M9V,Etobicoke,"Mount Olive,South Steeles,Thistletown,Silverst...",43.739416,-79.588437


## Locating co-ordinates of Toronto

In [14]:
#Co-ordinates of Toronto
address = 'Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")

location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Totonto is {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Totonto is 43.653963, -79.387207.


## Generating map of Toronto

In [15]:
# Generating map of Toronto
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(Canada_co['Latitude'], Canada_co['Longitude'], Canada_co['Borough'], Canada_co['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

## Generating list of Borough containing Toronto

In [16]:
#Generating list of borough containing keyword Toronto

borough_names = list(Canada_co.Borough.unique())

toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        toronto.append(x)
        
toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

## Creating dataframe of with borough containing the keyword Toronto

In [17]:
#Creating Dataframes 
toronto_df = Canada_co[Canada_co['Borough'].isin(toronto)].reset_index(drop=True)
print(toronto_df.shape)
toronto_df.head(39)

(39, 5)


Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,East Toronto,The Beaches,M4E,43.676357,-79.293031
1,East Toronto,"Riverdale,The Danforth West",M4K,43.679557,-79.352188
2,East Toronto,"The Beaches West,India Bazaar",M4L,43.668999,-79.315572
3,East Toronto,Studio District,M4M,43.659526,-79.340923
4,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879
5,Central Toronto,Davisville North,M4P,43.712751,-79.390197
6,Central Toronto,North Toronto West,M4R,43.715383,-79.405678
7,Central Toronto,Davisville,M4S,43.704324,-79.38879
8,Central Toronto,"Summerhill East,Moore Park",M4T,43.689574,-79.38316
9,Central Toronto,"Summerhill West,Forest Hill SE,Deer Park,South...",M4V,43.686412,-79.400049


## Extracting locational data from Four Square

In [26]:
CLIENT_ID = '3H5Y5LXWNZ0EBEX1HCK4PWBJWR3CN1U23U4VK5GBR0VTFA3C' # your Foursquare ID
CLIENT_SECRET = 'J11HO1VLJLTGOKIWSZUBI1OOCZS1J2EKOF2DG032JNRG2VKL' # your Foursquare Secret
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3H5Y5LXWNZ0EBEX1HCK4PWBJWR3CN1U23U4VK5GBR0VTFA3C
CLIENT_SECRET:J11HO1VLJLTGOKIWSZUBI1OOCZS1J2EKOF2DG032JNRG2VKL


In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&r0adius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]["groups"][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
#Getting venues near Toronto

toronto_venues = getNearbyVenues(names=toronto_df['Neighbourhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude'])

The Beaches
Riverdale,The Danforth West
The Beaches West,India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Summerhill East,Moore Park
Summerhill West,Forest Hill SE,Deer Park,South Hill,Rathnelly
Rosedale
St. James Town,Cabbagetown
Church and Wellesley
Harbourfront
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,Richmond,King
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Roselawn
Forest Hill West,Forest Hill North
Yorkville,The Annex,North Midtown
University of Toronto,Harbord
Kensington Market,Grange Park,Chinatown
South Niagara,Bathurst Quay,King and Spadina,Railway Lands,CN Tower,Harbourfront West,Island airport
Stn A PO Boxes 25 The Esplanade
Underground city,First Canadian Place
Christie
Dovercourt Village,Dufferin
Little Portugal,Trinity
Exhibition Place,Parkdale Village,Brockton
High Park,The Junction South
Parkdale,Roncesvalles
Swansea,R

In [31]:
toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Fox Theatre,43.672801,-79.287272,Indie Movie Theater
1,The Beaches,43.676357,-79.293031,The Beech Tree,43.680493,-79.288846,Gastropub
2,The Beaches,43.676357,-79.293031,Ed's Real Scoop,43.67263,-79.287993,Ice Cream Shop
3,The Beaches,43.676357,-79.293031,Bagels On Fire,43.672864,-79.286784,Bagel Shop
4,The Beaches,43.676357,-79.293031,Beaches Bake Shop,43.680363,-79.289692,Bakery


## Analyzing data extracted from FourSquare

In [32]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo
0,"Adelaide,Richmond,King",0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,...,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0
4,Christie,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
5,Church and Wellesley,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,...,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0
6,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,...,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
7,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.02,0.0
8,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.02,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,...,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0


In [34]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,Richmond,King----
             venue  freq
0      Coffee Shop  0.08
1             Café  0.05
2              Bar  0.04
3       Steakhouse  0.04
4  Thai Restaurant  0.03


----Berczy Park----
         venue  freq
0  Coffee Shop  0.10
1         Café  0.05
2     Beer Bar  0.04
3        Hotel  0.04
4   Restaurant  0.04


----Business Reply Mail Processing Centre 969 Eastern----
         venue  freq
0  Coffee Shop  0.07
1      Brewery  0.06
2         Park  0.06
3         Café  0.06
4       Bakery  0.05


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.13
1   Italian Restaurant  0.04
2      Bubble Tea Shop  0.03
3  Japanese Restaurant  0.03
4                 Café  0.03


----Christie----
               venue  freq
0               Café  0.11
1        Coffee Shop  0.07
2                Bar  0.05
3  Korean Restaurant  0.05
4      Grocery Store  0.04


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.11
1  Japanese 

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,Richmond,King",Coffee Shop,Café,Bar,Steakhouse,Asian Restaurant,Thai Restaurant,Theater,Sushi Restaurant,Hotel,Gym
1,Berczy Park,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Cocktail Bar,Breakfast Spot,Japanese Restaurant,Park,Italian Restaurant
2,Business Reply Mail Processing Centre 969 Eastern,Coffee Shop,Brewery,Park,Café,Bakery,Beach,Indian Restaurant,Pizza Place,Bar,Italian Restaurant
3,Central Bay Street,Coffee Shop,Italian Restaurant,Ice Cream Shop,Clothing Store,Bakery,Café,Bubble Tea Shop,Japanese Restaurant,Gastropub,Gym
4,Christie,Café,Coffee Shop,Bar,Korean Restaurant,Italian Restaurant,Grocery Store,Indian Restaurant,Dessert Shop,Vegetarian / Vegan Restaurant,Ice Cream Shop


## Finding K-means and creating clusters

In [38]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 4, 1, 3, 1, 2, 1, 1, 2], dtype=int32)

In [39]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,The Beaches,M4E,43.676357,-79.293031,4,Park,Coffee Shop,Beach,Café,Pub,Breakfast Spot,Ice Cream Shop,Bakery,Indian Restaurant,BBQ Joint
1,East Toronto,"Riverdale,The Danforth West",M4K,43.679557,-79.352188,4,Greek Restaurant,Café,Park,Vietnamese Restaurant,Bakery,Pub,Italian Restaurant,American Restaurant,Ice Cream Shop,Coffee Shop
2,East Toronto,"The Beaches West,India Bazaar",M4L,43.668999,-79.315572,4,Park,Café,Coffee Shop,Beach,Brewery,Italian Restaurant,Bakery,Pizza Place,Indian Restaurant,American Restaurant
3,East Toronto,Studio District,M4M,43.659526,-79.340923,4,Coffee Shop,Café,Park,Brewery,Vietnamese Restaurant,Bakery,French Restaurant,Diner,Bar,Thai Restaurant
4,Central Toronto,Lawrence Park,M4N,43.72802,-79.38879,1,Coffee Shop,Italian Restaurant,Park,Sushi Restaurant,Café,Bakery,Ice Cream Shop,Burger Joint,Supermarket,Yoga Studio


In [40]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Summerhill East,Moore Park",0,Italian Restaurant,Park,Café,Sushi Restaurant,Bakery,Dessert Shop,Indian Restaurant,Restaurant,Coffee Shop,Grocery Store
9,"Summerhill West,Forest Hill SE,Deer Park,South...",0,Italian Restaurant,Café,Park,Sushi Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Middle Eastern Restaurant,Yoga Studio,Modern European Restaurant,Bagel Shop
10,Rosedale,0,Park,Coffee Shop,Italian Restaurant,Café,Spa,Indian Restaurant,Gourmet Shop,Grocery Store,Hotel,Sushi Restaurant
27,"South Niagara,Bathurst Quay,King and Spadina,R...",0,Park,Café,Coffee Shop,Hotel,Italian Restaurant,Gym,Brewery,Scenic Lookout,Aquarium,Baseball Stadium


In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Lawrence Park,1,Coffee Shop,Italian Restaurant,Park,Sushi Restaurant,Café,Bakery,Ice Cream Shop,Burger Joint,Supermarket,Yoga Studio
5,Davisville North,1,Coffee Shop,Italian Restaurant,Café,Bakery,Indian Restaurant,Park,Gym,Restaurant,Asian Restaurant,Gastropub
6,North Toronto West,1,Coffee Shop,Italian Restaurant,Café,Bakery,Park,Sushi Restaurant,Garden,Skating Rink,Japanese Restaurant,Fast Food Restaurant
7,Davisville,1,Coffee Shop,Bakery,Italian Restaurant,Café,Indian Restaurant,Park,Gym,Dessert Shop,Japanese Restaurant,Yoga Studio
12,Church and Wellesley,1,Coffee Shop,Japanese Restaurant,Burger Joint,Café,Restaurant,Sushi Restaurant,Gym,Liquor Store,Mediterranean Restaurant,Men's Store
14,"Ryerson,Garden District",1,Coffee Shop,Cosmetics Shop,Restaurant,Clothing Store,Middle Eastern Restaurant,Fast Food Restaurant,Tea Room,Japanese Restaurant,Café,Sushi Restaurant
17,Central Bay Street,1,Coffee Shop,Italian Restaurant,Ice Cream Shop,Clothing Store,Bakery,Café,Bubble Tea Shop,Japanese Restaurant,Gastropub,Gym
22,Roselawn,1,Coffee Shop,Italian Restaurant,Café,Bakery,Japanese Restaurant,Sporting Goods Shop,Bookstore,Bagel Shop,Food & Drink Shop,Deli / Bodega
23,"Forest Hill West,Forest Hill North",1,Coffee Shop,Italian Restaurant,Sushi Restaurant,Park,Gastropub,Restaurant,Japanese Restaurant,Café,Bakery,Middle Eastern Restaurant
37,Queen's Park,1,Coffee Shop,Sandwich Place,Italian Restaurant,Café,Park,Gastropub,Gym,Burrito Place,Burger Joint,Sushi Restaurant


In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Harbourfront,2,Coffee Shop,Bakery,Restaurant,Park,Café,Pub,Theater,Breakfast Spot,Farmers Market,Italian Restaurant
15,St. James Town,2,Coffee Shop,Café,Hotel,Restaurant,Bakery,Seafood Restaurant,Cosmetics Shop,Breakfast Spot,Italian Restaurant,Theater
16,Berczy Park,2,Coffee Shop,Café,Hotel,Beer Bar,Restaurant,Cocktail Bar,Breakfast Spot,Japanese Restaurant,Park,Italian Restaurant
18,"Adelaide,Richmond,King",2,Coffee Shop,Café,Bar,Steakhouse,Asian Restaurant,Thai Restaurant,Theater,Sushi Restaurant,Hotel,Gym
19,"Harbourfront East,Toronto Islands,Union Station",2,Coffee Shop,Hotel,Aquarium,Restaurant,Café,Brewery,Italian Restaurant,Scenic Lookout,Park,Plaza
20,"Design Exchange,Toronto Dominion Centre",2,Coffee Shop,Hotel,Café,Restaurant,Bar,Seafood Restaurant,American Restaurant,Gastropub,Deli / Bodega,Steakhouse
21,"Commerce Court,Victoria Hotel",2,Coffee Shop,Café,Hotel,Restaurant,Gastropub,Seafood Restaurant,Steakhouse,Gym,Japanese Restaurant,Beer Bar
28,Stn A PO Boxes 25 The Esplanade,2,Coffee Shop,Café,Restaurant,Japanese Restaurant,Beer Bar,Hotel,Italian Restaurant,Bakery,Cocktail Bar,Gym
29,"Underground city,First Canadian Place",2,Coffee Shop,Café,Hotel,Steakhouse,Gastropub,American Restaurant,Restaurant,Bar,Seafood Restaurant,Gym


In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,"St. James Town,Cabbagetown",3,Coffee Shop,Café,Japanese Restaurant,Park,Pub,Diner,Bakery,Thai Restaurant,Restaurant,Gastropub
24,"Yorkville,The Annex,North Midtown",3,Coffee Shop,Café,Italian Restaurant,Restaurant,Japanese Restaurant,Bakery,Vegetarian / Vegan Restaurant,Grocery Store,French Restaurant,Gastropub
25,"University of Toronto,Harbord",3,Café,Bakery,Coffee Shop,Vegetarian / Vegan Restaurant,Bookstore,Bar,Pub,Comfort Food Restaurant,Thai Restaurant,Grocery Store
26,"Kensington Market,Grange Park,Chinatown",3,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bar,Vietnamese Restaurant,Dumpling Restaurant,Mexican Restaurant,Coffee Shop,Dessert Shop,Bakery
30,Christie,3,Café,Coffee Shop,Bar,Korean Restaurant,Italian Restaurant,Grocery Store,Indian Restaurant,Dessert Shop,Vegetarian / Vegan Restaurant,Ice Cream Shop
31,"Dovercourt Village,Dufferin",3,Café,Italian Restaurant,Coffee Shop,Bar,Bakery,Park,Breakfast Spot,Cocktail Bar,Sushi Restaurant,Restaurant
32,"Little Portugal,Trinity",3,Café,Bar,Restaurant,Bakery,Pizza Place,Coffee Shop,Cocktail Bar,Italian Restaurant,Vegetarian / Vegan Restaurant,Asian Restaurant
33,"Exhibition Place,Parkdale Village,Brockton",3,Café,Coffee Shop,Restaurant,Bar,Gift Shop,Furniture / Home Store,Bakery,Theater,Theme Park,Japanese Restaurant
34,"High Park,The Junction South",3,Café,Coffee Shop,Bar,Bakery,Italian Restaurant,Pizza Place,Brewery,Restaurant,Dog Run,Breakfast Spot
35,"Parkdale,Roncesvalles",3,Café,Coffee Shop,Bakery,Restaurant,Park,Bar,Italian Restaurant,Sushi Restaurant,Gastropub,Pizza Place


## Analyzing clusters

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,4,Park,Coffee Shop,Beach,Café,Pub,Breakfast Spot,Ice Cream Shop,Bakery,Indian Restaurant,BBQ Joint
1,"Riverdale,The Danforth West",4,Greek Restaurant,Café,Park,Vietnamese Restaurant,Bakery,Pub,Italian Restaurant,American Restaurant,Ice Cream Shop,Coffee Shop
2,"The Beaches West,India Bazaar",4,Park,Café,Coffee Shop,Beach,Brewery,Italian Restaurant,Bakery,Pizza Place,Indian Restaurant,American Restaurant
3,Studio District,4,Coffee Shop,Café,Park,Brewery,Vietnamese Restaurant,Bakery,French Restaurant,Diner,Bar,Thai Restaurant
38,Business Reply Mail Processing Centre 969 Eastern,4,Coffee Shop,Brewery,Park,Café,Bakery,Beach,Indian Restaurant,Pizza Place,Bar,Italian Restaurant


## Observations

The cluster of "The Beaches" and "The Beaches West,India Bazaar" has the highest number of Parks. The clusters "Studio District" and "Business Reply Mail Processing Centre 969 Eastern" has the highest number of coffee shops. Cafe has been consistently found has the 2nd most common place in 3 out of 5 clusters