In [226]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim
import json
from pandas.io.json import json_normalize

# 1. WebScraping to Get Data

In [227]:
# passing the wikipedia url where we have the data tables
Toronto_neighbors_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

headers = {
    'Access-Control-Allow-Origin': '*',
    'Access-Control-Allow-Methods': 'GET',
    'Access-Control-Allow-Headers': 'Content-Type',
    'Access-Control-Max-Age': '3600',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
}

req = requests.get(Toronto_neighbors_url, headers) # Get the data using requests
soup = BeautifulSoup(req.content, 'html.parser')  #Creating BeautifulSoup object to pull data in html format
# soup.contents

In [228]:
table_contents = []
table = soup.find('table')  #now using soup object we can acess the tables present in data

for row in table.findAll('td'):  #getting the td->table data of each row
    cell = {}  #creating an empty dictionary 
    if row.span.text == 'Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3] #filling the data(value) of 'postalCode' giving this as key to the dictionary 
        cell['Borough'] = (row.span.text).split('(')[0] #same
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace('/',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)
print(table_contents[:5]) #now we have the data of each row in this list object but the list elements are dictionaries  
df = pd.DataFrame(table_contents)  #convert that list to dataframe
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df.head()

[{'PostalCode': 'M3A', 'Borough': 'North York', 'Neighborhood': 'Parkwoods'}, {'PostalCode': 'M4A', 'Borough': 'North York', 'Neighborhood': 'Victoria Village'}, {'PostalCode': 'M5A', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Regent Park , Harbourfront'}, {'PostalCode': 'M6A', 'Borough': 'North York', 'Neighborhood': 'Lawrence Manor , Lawrence Heights'}, {'PostalCode': 'M7A', 'Borough': "Queen's Park", 'Neighborhood': 'Ontario Provincial Government'}]


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [229]:
df['Neighborhood'] = df.groupby('PostalCode')['Neighborhood'].transform(lambda x: ','.join(x))
df = df.drop_duplicates()

# 2. Get The Latitude and Longitude Coordinates of Each From Geospatial_coordinates.csv

In [230]:
Geospatial_coordinates_df = pd.read_csv("Geospatial_coordinates.csv")
Geospatial_coordinates_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [231]:
df = df.merge(Geospatial_coordinates_df, left_on = 'PostalCode', right_on='Postal Code')
df = df.drop(df.columns[3], axis=1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


# Explore Cluster The Neighborhoods in Toronto

In [232]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent = "tn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The geographical coordinates of Toronto City are {}, {}".format(latitude,longitude))

The geographical coordinates of Toronto City are 43.6534817, -79.3839347


In [233]:
toronto_df = df[df['Borough'].str.contains('Toronto')]
toronto_df = toronto_df.reset_index()
toronto_df = toronto_df.drop('index',axis = 1)
toronto_df.head()
# toronto_df.shape

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [234]:
# create map of New York using latitude and longitude values before filtering for toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


In [235]:
CLIENT_ID='IM0TJ0DO2AWOEENTZTCHLZ4DJXWXXY1ELFMC0U2A5O5ASGHQ'
CLIENT_SECRET='O1JWC1HPJKWMVWFYXFSFHGLDZQK20ENDWTX4UTKK3C0FVWSK'
VERSION='20180605'
LIMIT=100
radius = 500

# API call to Foursquare

In [236]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60dc51c24265776210f5c93d'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 64,
  'suggestedBounds': {'ne': {'lat': 43.6579817045, 'lng': -79.37772678059432},
   'sw': {'lat': 43.6489816955, 'lng': -79.39014261940568}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng'

# Getting Venue Data

In [237]:
# function to get the category of the venue
def get_category(row):
    try:
        categories_list=row['categories']
    except:
        categories_list = row['venue.categories']
        
    if(len(categories_list)==0):
        return None
    else:
        return categories_list[0]['name']

In [238]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)  #flatten Json

#filter columns
filtered_columns = ['venue.name','venue.categories','venue.location.lat','venue.location.lng']
nearby_venues = nearby_venues.loc[:,filtered_columns]

#filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category, axis=1)

#clean column
nearby_venues.columns = [col.split('.')[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues)  #flatten Json


Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,LUSH,Cosmetics Shop,43.653557,-79.3804
3,M Square Coffee Co,Coffee Shop,43.651218,-79.383555
4,Indigo,Bookstore,43.653515,-79.380696


In [239]:
# number of venues returned by foursquare
print("{} venues are returned by foursquare.".format(nearby_venues.shape[0]))

64 venues are returned by foursquare.


# Getting data from foursquare for all latitude and longitude and adding the postal code, borough and neighborhood columns to dataframe

In [240]:
venues_list=[]
for lat, long, post, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['PostalCode'], toronto_df['Borough'], toronto_df['Neighborhood']):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
         
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']


        # return only relevant information for each nearby venue
        for v in results:
            venues_list.append((
             post,
             borough,
            neighborhood,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']))

toronto_venues = pd.DataFrame(venues_list)
toronto_venues.columns = ['PostalCode', 
                         'Borough', 
                         'Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']


In [241]:
print(toronto_venues.shape)
toronto_venues.head()

(365, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.321558,Leslie St. Spit hotdog stand,43.652255,-79.322791,Hot Dog Joint
1,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.321558,TTC Leslie Barns,43.657633,-79.325212,Light Rail Station
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.321558,TTC Leslie Barns,43.657633,-79.325212,Light Rail Station
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.321558,Leslie St. Spit hotdog stand,43.652255,-79.322791,Hot Dog Joint
4,M4E,East Toronto,The Beaches,43.676357,-79.321558,Monarch Park Stadium,43.678144,-79.324038,Soccer Field


In [242]:
# Venues count by postal code/borough and neighborhood and grouping
toronto_venues.groupby(['PostalCode','Borough','Neighborhood']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,24,24,24,24,24,24
M4J,East York/East Toronto,The Danforth East,18,18,18,18,18,18
M4K,East Toronto,"The Danforth West , Riverdale",16,16,16,16,16,16
M4L,East Toronto,"India Bazaar , The Beaches West",29,29,29,29,29,29
M4M,East Toronto,Studio District,3,3,3,3,3,3
M4N,Central Toronto,Lawrence Park,4,4,4,4,4,4
M4P,Central Toronto,Davisville North,4,4,4,4,4,4
M4R,Central Toronto,North Toronto West,2,2,2,2,2,2
M4S,Central Toronto,Davisville,1,1,1,1,1,1
M4T,Central Toronto,"Moore Park , Summerhill East",7,7,7,7,7,7


In [243]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

toronto_venues['Venue Category'].unique()[:50]

There are 67 uniques categories.


array(['Hot Dog Joint', 'Light Rail Station', 'Soccer Field',
       'Asian Restaurant', 'Park', 'Indian Restaurant', 'Brewery', 'Bar',
       'Grocery Store', 'Sandwich Place', 'Café',
       'Indian Chinese Restaurant', 'Bus Stop', 'Dog Run', 'Track',
       'Discount Store', 'Donut Shop', 'Gastropub', 'Italian Restaurant',
       'Restaurant', 'Shopping Plaza', 'Boat or Ferry', 'Harbor / Marina',
       'Pakistani Restaurant', 'Fast Food Restaurant', 'Indie Theater',
       'Snack Place', "Women's Store", 'Halal Restaurant', 'Playground',
       'Pet Store', 'Art Gallery', 'Theater', 'Farmers Market',
       'Steakhouse', 'Burrito Place', 'Bowling Alley', 'Burger Joint',
       'Bank', 'Pizza Place', 'Breakfast Spot', 'Ice Cream Shop',
       'Coffee Shop', 'Pharmacy', 'Mexican Restaurant', 'Bus Station',
       'Gym', 'Cosmetics Shop', 'Scenic Lookout', 'Auto Workshop'],
      dtype=object)

# Analyze each Area

In [244]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Art Gallery,Asian Restaurant,Auto Workshop,Bank,Bar,Board Shop,Boat or Ferry,Bowling Alley,Breakfast Spot,...,Soccer Field,Soccer Stadium,Spa,Steakhouse,Theater,Tibetan Restaurant,Track,Trail,Women's Store,Yoga Studio
0,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Garden District, Ryerson",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,St. James Town,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [245]:
print(toronto_onehot.shape)

toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped.head()

(365, 68)
(38, 68)


Unnamed: 0,Neighborhood,Art Gallery,Asian Restaurant,Auto Workshop,Bank,Bar,Board Shop,Boat or Ferry,Bowling Alley,Breakfast Spot,...,Soccer Field,Soccer Stadium,Spa,Steakhouse,Theater,Tibetan Restaurant,Track,Trail,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Christie,0.035714,0.035714,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0
4,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,...,0.0,0.0,0.055556,0.055556,0.0,0.0,0.0,0.0,0.0,0.055556


In [246]:

num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
             venue  freq
0  Harbor / Marina   0.5
1    Boat or Ferry   0.5
2      Art Gallery   0.0
3           Office   0.0
4      Pizza Place   0.0


----Brockton , Parkdale Village , Exhibition Place----
            venue  freq
0  Scenic Lookout  0.33
1   Boat or Ferry  0.33
2            Park  0.33
3     Art Gallery  0.00
4          Office  0.00


----Central Bay Street----
                venue  freq
0  Light Rail Station   1.0
1         Art Gallery   0.0
2          Restaurant   0.0
3   Indian Restaurant   0.0
4       Indie Theater   0.0


----Christie----
                       venue  freq
0          Indian Restaurant  0.25
1                       Café  0.07
2              Grocery Store  0.07
3                Art Gallery  0.04
4  Indian Chinese Restaurant  0.04


----Church and Wellesley----
                  venue  freq
0  Fast Food Restaurant  0.17
1         Burrito Place  0.11
2           Yoga Studio  0.06
3    Light Rail Station  0.06
4         Garden Cente

In [247]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# New Dataframe and then display the top 10 venues of each PostalCode

In [248]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Harbor / Marina,Boat or Ferry,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Mexican Restaurant
1,"Brockton , Parkdale Village , Exhibition Place",Scenic Lookout,Boat or Ferry,Park,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Pakistani Restaurant,Music Venue
2,Central Bay Street,Light Rail Station,Art Gallery,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office
3,Christie,Indian Restaurant,Café,Grocery Store,Art Gallery,Indian Chinese Restaurant,Asian Restaurant,Halal Restaurant,Pakistani Restaurant,Fast Food Restaurant,Pet Store
4,Church and Wellesley,Fast Food Restaurant,Burrito Place,Yoga Studio,Light Rail Station,Garden Center,Pizza Place,Farmers Market,Restaurant,Sandwich Place,Butcher


# Clustering

In [249]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

#run k-means clustering
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors

kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_grouped_clustering)
# print(kmeans.labels_.dtype)
kmeans.labels_[0:10]

array([2, 1, 0, 1, 1, 2, 4, 1, 1, 1])

In [252]:
neighborhoods_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_df

# merge toronto_grouped with toronto_df to add latitude and longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
# print(toronto_merged['Cluster Labels'].dtype)
toronto_merged.dropna(axis=0, inplace=True)
# toronto_merged['Cluster Labels'].astype(int)

# print(toronto_merged['Cluster Labels'].dtype)
toronto_merged.head() # check the last columns!


float64
float64


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0.0,Hot Dog Joint,Light Rail Station,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0.0,Light Rail Station,Art Gallery,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3.0,Hot Dog Joint,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Music Venue,Office
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,1.0,Indian Restaurant,Grocery Store,Sandwich Place,Bus Stop,Indian Chinese Restaurant,Asian Restaurant,Park,Gastropub,Restaurant,Donut Shop
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2.0,Harbor / Marina,Boat or Ferry,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Mexican Restaurant


In [254]:
# create map
map_clusters = folium.Map(location = [43.6532, 79.3832], zoom_start=0)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
markers_colors=[]
for lat,lng,neighborhood,cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(neighborhood)+', Cluster'+str(int(cluster)), parse_html=True)
    cluster=int(cluster)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = rainbow[cluster-1],
        fill=True,
        fil_color = rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
    

map_clusters

# Examine Clusters

## Cluster 1

In [255]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0.0,Hot Dog Joint,Light Rail Station,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office
1,Downtown Toronto,0.0,Light Rail Station,Art Gallery,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office
5,Downtown Toronto,0.0,Light Rail Station,Art Gallery,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Mexican Restaurant,Music Venue,Office


## Cluster 2

In [256]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,East Toronto,1.0,Indian Restaurant,Grocery Store,Sandwich Place,Bus Stop,Indian Chinese Restaurant,Asian Restaurant,Park,Gastropub,Restaurant,Donut Shop
6,Downtown Toronto,1.0,Indian Restaurant,Café,Grocery Store,Art Gallery,Indian Chinese Restaurant,Asian Restaurant,Halal Restaurant,Pakistani Restaurant,Fast Food Restaurant,Pet Store
8,West Toronto,1.0,Indian Restaurant,Sandwich Place,Art Gallery,Café,Indian Chinese Restaurant,Asian Restaurant,Halal Restaurant,Pakistani Restaurant,Grocery Store,Pet Store
9,East York/East Toronto,1.0,Bar,Cosmetics Shop,Bus Station,Ice Cream Shop,Park,Pharmacy,Gym,Pizza Place,Gastropub,Sandwich Place
12,East Toronto,1.0,Park,Cosmetics Shop,Burger Joint,Pizza Place,Pharmacy,Coffee Shop,Grocery Store,Soccer Field,Gym,Sandwich Place
14,West Toronto,1.0,Scenic Lookout,Boat or Ferry,Park,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Pakistani Restaurant,Music Venue
15,East Toronto,1.0,Indian Restaurant,Sandwich Place,Art Gallery,Café,Indian Chinese Restaurant,Asian Restaurant,Halal Restaurant,Pakistani Restaurant,Grocery Store,Pet Store
17,East Toronto,1.0,Brewery,Auto Workshop,Light Rail Station,Office,Playground,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant
18,Central Toronto,1.0,Coffee Shop,Music Venue,French Restaurant,Restaurant,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Indie Theater,Indian Chinese Restaurant
19,Central Toronto,1.0,Brewery,Playground,Skating Rink,Soccer Stadium,Pharmacy,Pet Store,Park,Pakistani Restaurant,Office,Art Gallery


## Cluster 3

In [257]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,2.0,Harbor / Marina,Boat or Ferry,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Mexican Restaurant
10,Downtown Toronto,2.0,Boat or Ferry,Harbor / Marina,Scenic Lookout,Art Gallery,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant
11,West Toronto,2.0,Hot Dog Joint,Boat or Ferry,Music Venue,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Office,Mexican Restaurant
13,Downtown Toronto,2.0,Boat or Ferry,Art Gallery,Music Venue,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Office,Mexican Restaurant
16,Downtown Toronto,2.0,Hot Dog Joint,Harbor / Marina,Boat or Ferry,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Mexican Restaurant
34,Downtown Toronto Stn A,2.0,Boat or Ferry,Art Gallery,Music Venue,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Office,Mexican Restaurant
36,Downtown Toronto,2.0,Hot Dog Joint,Harbor / Marina,Boat or Ferry,Office,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Mexican Restaurant


## Cluster 4

In [258]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,3.0,Hot Dog Joint,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Music Venue,Office
7,Downtown Toronto,3.0,Hot Dog Joint,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Music Venue,Office
28,West Toronto,3.0,Hot Dog Joint,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Music Venue,Office
30,Downtown Toronto,3.0,Hot Dog Joint,Restaurant,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Light Rail Station,Mexican Restaurant,Music Venue,Office


## Cluster 5

In [259]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,Central Toronto,4.0,Trail,Art Gallery,Music Venue,Pizza Place,Pharmacy,Pet Store,Park,Pakistani Restaurant,Office,Mexican Restaurant


## Observations > The cluster 1 is closer to the railway station, cluster 2 is the main business area having Indian restaurants, Grocery stores, Yoga studios, coffee shops and offices, cluster 3 is near to the harbor, cluster 4 is main Hot Dog Joint, and main restaurants area, cluster 5 is main Art Gallery. 