In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import folium

In [2]:
# URL Link to Wiki Page
wiki_link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# Extracting page HTML content.
page = requests.get(wiki_link)
soup = BeautifulSoup(page.text, 'html.parser')

In [3]:
# Finding table through the class
data_table = soup.find(class_='wikitable sortable')
data_rows = data_table.find_all('tr')
print(len(data_rows))

289


In [4]:
# Iterate through each table row and extract data.
columns = data_rows[0].text.split() # Gets name of the columns from wiki table.
data_arr = [] # Stores 2D array of the table.
last = ['Not assigned', 'Not assigned', 'Not assigned'] # Keeping the last row for comparing with duplicate Postcodes
for i in data_rows[1:]:
    # Extracting the text and splitting each column of the current row
    temp = i.text.split('\n')[1:4]
    # Checks to see if Borough is Not Assigned.
    if temp[1] == 'Not assigned':
        continue
    # Checks to see if Neighborhood is Not Assigned.
    if temp[2] == 'Not assigned':
        temp[2] = temp[1]
    # Checks to see if the Postcode of current and previous is the same.
    if temp[0] == last[0]:
        # Changes the latest element
        data_arr[-1][2] = data_arr[-1][2] + ', ' + temp[2]
        continue
    
    # Keeping track of the last row.
    last = temp
    
    data_arr.append(temp)
    
df = pd.DataFrame(data_arr, columns=columns)
print(df.head())
print(df.shape)

  Postcode           Borough                     Neighbourhood
0      M3A        North York                         Parkwoods
1      M4A        North York                  Victoria Village
2      M5A  Downtown Toronto         Harbourfront, Regent Park
3      M6A        North York  Lawrence Heights, Lawrence Manor
4      M7A      Queen's Park                      Queen's Park
(103, 3)


In [5]:
# Loading Geospatial Coordinate Data.
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
print(df_geo.head())

# Creating joined DataFrames.
df = df.join(df_geo)
print(df.head())
print(df.shape)

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
  Postcode           Borough                     Neighbourhood Postal Code  \
0      M3A        North York                         Parkwoods         M1B   
1      M4A        North York                  Victoria Village         M1C   
2      M5A  Downtown Toronto         Harbourfront, Regent Park         M1E   
3      M6A        North York  Lawrence Heights, Lawrence Manor         M1G   
4      M7A      Queen's Park                      Queen's Park         M1H   

    Latitude  Longitude  
0  43.806686 -79.194353  
1  43.784535 -79.160497  
2  43.763573 -79.188711  
3  43.770992 -79.216917  
4  43.773136 -79.239476  
(103, 6)


In [6]:
# Checking to see the number of boroughs and neighborhoods.
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [7]:
from geopy.geocoders import Nominatim

# Getting latitude and longitude values of Toronto, Canada
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [8]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(df, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [9]:
# Obtaining only Borough that have Toronto in the name.
toronto_data = df[df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",M1E,43.763573,-79.188711
1,M5B,Downtown Toronto,"Ryerson, Garden District",M1N,43.692657,-79.264848
2,M5C,Downtown Toronto,St. James Town,M1W,43.799525,-79.318389
3,M4E,East Toronto,The Beaches,M2K,43.786947,-79.385975
4,M5E,Downtown Toronto,Berczy Park,M2L,43.75749,-79.374714


In [10]:
# Recreating map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [39]:
CLIENT_ID = 'DELETED' # your Foursquare ID
CLIENT_SECRET = 'DELETED' # your Foursquare Secret
VERSION = '20190703' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DELETED
CLIENT_SECRET:DELETED


In [12]:
# Exploring first neighborhood in dataframe.
toronto_data.loc[0, 'Neighbourhood']

'Harbourfront, Regent Park'

In [13]:
# Getting neighborhood's latitude and longitude values.
neighborhood_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Harbourfront, Regent Park are 43.7635726, -79.1887115.


In [40]:
# Getting top 100 venues in Harbourfront
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url 

'https://api.foursquare.com/v2/venues/explore?&client_id=DELETED&client_secret=DELETED&v=DELETED&ll=43.7635726,-79.1887115&radius=500&limit=100'

In [15]:
# Getting request from URL.
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d1d73c3348eba002301f168'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4b6074e3f964a5200fe729e3-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/pizza_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d1ca941735',
         'name': 'Pizza Place',
         'pluralName': 'Pizza Places',
         'primary': True,
         'shortName': 'Pizza'}],
       'id': '4b6074e3f964a5200fe729e3',
       'location': {'address': '4410 Kingston Rd',
        'cc': 'CA',
        'city': 'Scarborough',
        'country': 'Canada',
        'distance': 469,
        'formattedAddress': ['4410 Kingston Rd',
         'Scarborough ON M1E 2N5',
         'Canada'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.7676970829

In [16]:
# Function extracts the category of the venue.
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [17]:
# Converting JSON to pandas dataframe.
from pandas.io.json import json_normalize
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Swiss Chalet Rotisserie & Grill,Pizza Place,43.767697,-79.189914
1,G & G Electronics,Electronics Store,43.765309,-79.191537
2,Marina Spa,Spa,43.766,-79.191
3,Big Bite Burrito,Mexican Restaurant,43.766299,-79.19072
4,chatr Mobile,Tech Startup,43.765917,-79.191672


In [18]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

9 venues were returned by Foursquare.


In [19]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [20]:
# Getting venues from each neighborhood.
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
The Danforth West, Riverdale
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North, Forest Hill West
High Park, The Junction South
North Toronto West
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
Harbord, University of Toronto
Runnymede, Swansea
Moore Park, Summerhill East
Chinatown, Grange Park, Kensington Market
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown, St. James Town
Fir

In [21]:
print(toronto_venues.shape)
toronto_venues.head()

(777, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
1,"Harbourfront, Regent Park",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
2,"Harbourfront, Regent Park",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa
3,"Harbourfront, Regent Park",43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
4,"Harbourfront, Regent Park",43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Tech Startup


In [22]:
# Gettting venues count for each neighborhood.
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",3,3,3,3,3,3
Berczy Park,1,1,1,1,1,1
"Brockton, Exhibition Place, Parkdale Village",38,38,38,38,38,38
Business Reply Mail Processing Centre 969 Eastern,4,4,4,4,4,4
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",19,19,19,19,19,19
"Cabbagetown, St. James Town",3,3,3,3,3,3
Central Bay Street,5,5,5,5,5,5
"Chinatown, Grange Park, Kensington Market",38,38,38,38,38,38
Christie,3,3,3,3,3,3
Church and Wellesley,6,6,6,6,6,6


In [23]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 203 uniques categories.


In [24]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighbourhood'] = toronto_venues['Neighborhood'] 

fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront, Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
toronto_onehot.shape

(777, 204)

In [26]:
# Group rows by neighborhood and takign mean of frequency of each category.
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Thrift / Vintage Store,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.026316
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
toronto_grouped.shape

(38, 204)

In [28]:
# Getting top 5 most common venue of each neighborhood.
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
               venue  freq
0            Airport  0.33
1  Other Repair Shop  0.33
2               Park  0.33
3       Hockey Arena  0.00
4             Museum  0.00


----Berczy Park----
                      venue  freq
0                 Cafeteria   1.0
1                   Airport   0.0
2             Movie Theater   0.0
3            Medical Center   0.0
4  Mediterranean Restaurant   0.0


----Brockton, Exhibition Place, Parkdale Village----
                 venue  freq
0                 Café  0.11
1          Coffee Shop  0.08
2            Gastropub  0.05
3  American Restaurant  0.05
4   Italian Restaurant  0.05


----Business Reply Mail Processing Centre 969 Eastern----
               venue  freq
0               Park  0.25
1        Pizza Place  0.25
2  Mobile Phone Shop  0.25
3           Bus Line  0.25
4      Movie Theater  0.00


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
         ve

In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [30]:
# Displaying top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Airport,Other Repair Shop,Park,Airport Gate,Falafel Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
1,Berczy Park,Cafeteria,Yoga Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
2,"Brockton, Exhibition Place, Parkdale Village",Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Gastropub,Yoga Studio,Fish Market,Park,Neighborhood
3,Business Reply Mail Processing Centre 969 Eastern,Park,Pizza Place,Bus Line,Mobile Phone Shop,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Yoga Studio,Fast Food Restaurant,Park,Pizza Place,Butcher,Burrito Place,Recording Studio,Restaurant,Brewery,Light Rail Station


In [31]:
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
# Start to cluster neighborhood.
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

kmeans.labels_[0:10] 

array([3, 1, 3, 3, 3, 0, 0, 3, 0, 0])

In [32]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront, Regent Park",M1E,43.763573,-79.188711,3,Breakfast Spot,Spa,Pizza Place,Medical Center,Rental Car Location,Intersection,Tech Startup,Electronics Store,Mexican Restaurant,Yoga Studio
1,M5B,Downtown Toronto,"Ryerson, Garden District",M1N,43.692657,-79.264848,3,Café,General Entertainment,Skating Rink,College Stadium,Convenience Store,Dessert Shop,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
2,M5C,Downtown Toronto,St. James Town,M1W,43.799525,-79.318389,0,Chinese Restaurant,Fast Food Restaurant,Breakfast Spot,American Restaurant,Pizza Place,Pharmacy,Thrift / Vintage Store,Coffee Shop,Sandwich Place,Grocery Store
3,M4E,East Toronto,The Beaches,M2K,43.786947,-79.385975,3,Café,Japanese Restaurant,Chinese Restaurant,Bank,Diner,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
4,M5E,Downtown Toronto,Berczy Park,M2L,43.75749,-79.374714,1,Cafeteria,Yoga Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store


In [33]:
# Visualizing resulting clusters.
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [34]:
# Examine Clusters
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,-79.318389,0,Chinese Restaurant,Fast Food Restaurant,Breakfast Spot,American Restaurant,Pizza Place,Pharmacy,Thrift / Vintage Store,Coffee Shop,Sandwich Place,Grocery Store
5,Downtown Toronto,-79.442259,0,Grocery Store,Pharmacy,Pizza Place,Coffee Shop,Discount Store,Department Store,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
6,Downtown Toronto,-79.329656,0,Park,Fast Food Restaurant,Food & Drink Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
34,Downtown Toronto,-79.565963,0,Empanada Restaurant,Pharmacy,Pizza Place,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
36,Downtown Toronto,-79.532242,0,Pizza Place,Coffee Shop,Chinese Restaurant,Sandwich Place,Intersection,Yoga Studio,Dessert Shop,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,-79.374714,1,Cafeteria,Yoga Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store


In [36]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Downtown Toronto,-79.38316,2,Playground,Tennis Court,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
22,Central Toronto,-79.428191,2,Field,Hockey Arena,Trail,Tennis Court,Yoga Studio,Dessert Shop,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [37]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,-79.188711,3,Breakfast Spot,Spa,Pizza Place,Medical Center,Rental Car Location,Intersection,Tech Startup,Electronics Store,Mexican Restaurant,Yoga Studio
1,Downtown Toronto,-79.264848,3,Café,General Entertainment,Skating Rink,College Stadium,Convenience Store,Dessert Shop,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
3,East Toronto,-79.385975,3,Café,Japanese Restaurant,Chinese Restaurant,Bank,Diner,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
7,Downtown Toronto,-79.464763,3,Airport,Other Repair Shop,Park,Airport Gate,Falafel Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
8,West Toronto,-79.506944,3,Grocery Store,Shopping Mall,Bank,Falafel Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
9,Downtown Toronto,-79.318389,3,Park,Cosmetics Shop,Pharmacy,Beer Store,Curling Ice,Athletics & Sports,Skating Rink,Bus Stop,Discount Store,Empanada Restaurant
10,West Toronto,-79.293031,3,Pub,Other Great Outdoors,Health Food Store,Trail,Neighborhood,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
11,East Toronto,-79.352188,3,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Restaurant,Spa,Juice Bar,Cosmetics Shop
12,Downtown Toronto,-79.315572,3,Park,Gym,Liquor Store,Sandwich Place,Burger Joint,Burrito Place,Italian Restaurant,Fast Food Restaurant,Pub,Steakhouse
13,West Toronto,-79.340923,3,Café,Coffee Shop,American Restaurant,Bakery,Italian Restaurant,Gastropub,Yoga Studio,Fish Market,Park,Neighborhood


In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Downtown Toronto,-79.498509,4,Baseball Field,Yoga Studio,Farmers Market,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
35,Downtown Toronto,-79.532242,4,Baseball Field,Yoga Studio,Farmers Market,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


### Conclusion

Seems like the clusters are grouped according to:
1. Restaurants
2. Cafeteria
3. Outdoors/Sports Areas
4. Casual Hangout Areas (i.e. Park, Cafe)
5. Baseball Field