### Install and import required libraries and save the Wikipedia page into an object Toronto_postcodes

In [1]:
!pip install BeautifulSoup4



In [2]:
import requests
import pandas as pd
import numpy as np

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
Toronto_postcodes = requests.get(url).text

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    conda-4.5.12               |        py36_1000         653 KB  conda-forge
    altair-2.3.0               |        py36_1001         533 KB  conda-forge
    pandas-0.23.4              |   py36hf8a1672_0        27.8 MB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                                           Total:        29.0 MB

The following NEW packages will be INSTALLED:

    altair:  2.3.0-py36_1001       conda-forge
    branca:  0.3.1-py


#### Use the BeautifulSoup to parse the page data and extract only the post codes table into a new list _postcodes_.
----

In [3]:
from bs4 import BeautifulSoup

post_soup = BeautifulSoup(Toronto_postcodes, 'html.parser')

In [4]:
postcodes = []
for row in post_soup.find_all("tr"):
    cols = row.find_all("td")
    cols = [ele.text.strip() for ele in cols]
    postcodes.append(cols)
    
postcodes = postcodes[1:]   #remove the first empty row
postcodes[0:10]   #print the first 10 items

[['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned'],
 ['M8A', 'Not assigned', 'Not assigned']]

____
#### After checking the data, remove the data after the postcode **M9Z**. 
#### Convert the list into pandas data frame *df_postcode* with proper column names. Then remove rows with no Borough assigned and finally replace the missing Neghborhoods by the name of the Borough. 
____

In [5]:
last = postcodes.index(['M9Z', 'Not assigned', 'Not assigned'])   #find the index position of the last relevant record and remove the rest. 
postcodes = postcodes[:last+1]
df_postcodes = pd.DataFrame(postcodes,columns=['PostalCode','Borough','Neighborhood'])    #convert the list into Pandas data frame. 

#drop rows with "not assigned" Borough
df_postcodes = df_postcodes[df_postcodes.Borough != 'Not assigned']
print(df_postcodes.shape)

# find rows with "not assigned" Neighborhood and replace with the name of the Borough
df_postcodes['Neighborhood'] = np.where(df_postcodes['Neighborhood'] == 'Not assigned', df_postcodes['Borough'], df_postcodes['Neighborhood'])
df_postcodes[:10]  #show the first 10 rows.
    

(212, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


----
#### Merge the rows with the same postal code using groupby method. 
----

In [6]:
# combine rows with the same postcode 
df_postcodes = df_postcodes.groupby(['PostalCode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_postcodes[0:10]   #show the first 10 rows

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [7]:
df_postcodes.shape   #show the size of the data frame.

(103, 3)

----
## Getting latitude and longitude data for each postcode. 

In [8]:
!pip install geocoder
import geocoder 


Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K    100% |████████████████████████████████| 102kB 14.3MB/s 
[?25hCollecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


----
### After trying to use Geocoder.google unsucesfuly, I changed to geocoder for Here maps. This worked very well, but two postcodes, M5W (Stn A PO Boxes 25 The Esplanade) and M7Y (Business reply mail Processing Centre969 Eastern), which are not real neigborhoods, but postal processing centres were causing errors. I decided to drop these rows, as they would anyway not be relevant for the task for clustering and comparing neighborhoods. 

In [9]:
df_postcodes = df_postcodes.drop([69,87]).reset_index(drop=True)
df_postcodes.shape

(101, 3)

----
### Then using the Here maps API and geocoder I obtained the coordinates for each postcode.

In [10]:

# add new columns to the data frame 
df_postcodes['Latitude']=""
df_postcodes['Longitude']=""

row=0

for row in range(0,len(df_postcodes)):
    g = geocoder.here('{}, Toronto, Ontario'.format(df_postcodes.iat[row,0]), app_id='cvHwUfEGD6sBef4ybHyD',app_code='cDHPp-nV6hHhzfrcoaGoXA')
    lat = g.json['raw']['NavigationPosition'][0]['Latitude']
    long = g.json['raw']['NavigationPosition'][0]['Longitude']
    df_postcodes.at[row,'Latitude'] = lat
    df_postcodes.at[row,'Longitude'] = long


In [57]:
toronto_data = df_postcodes
print(toronto_data.shape)
toronto_data[0:10]

(101, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.8115,-79.1955
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7857,-79.1587
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7657,-79.1753
3,M1G,Scarborough,Woburn,43.7684,-79.2176
4,M1H,Scarborough,Cedarbrae,43.7697,-79.2394
5,M1J,Scarborough,Scarborough Village,43.7431,-79.2318
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.7262,-79.2637
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.7131,-79.285
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.7236,-79.235
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6967,-79.2602


In [41]:
len(toronto_data['Neighborhood'].unique())

101

In [42]:
address = 'Toronto, CA'

g = geocoder.here('Toronto, Ontario', app_id='cvHwUfEGD6sBef4ybHyD',app_code='cDHPp-nV6hHhzfrcoaGoXA')
latitude = g.json['raw']['NavigationPosition'][0]['Latitude']
longitude = g.json['raw']['NavigationPosition'][0]['Longitude']
   
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6487, -79.38545.


In [44]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [45]:
CLIENT_ID = 'LNYXJNM5PLUISFYHMCMZXU5JYDZ3WJMS1K0OSRWA44I5WMNP' # your Foursquare ID
CLIENT_SECRET = 'E3OVKRBQPA0UAHFH51FFB2K1NVYCX3QOLTNG0VQ4X3HDLF2Y' # your Foursquare Secret
VERSION = '20181212' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LNYXJNM5PLUISFYHMCMZXU5JYDZ3WJMS1K0OSRWA44I5WMNP
CLIENT_SECRET:E3OVKRBQPA0UAHFH51FFB2K1NVYCX3QOLTNG0VQ4X3HDLF2Y


In [46]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [47]:
# type your answer here
LIMIT = 100
radius = 500
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )




Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West, Steeles West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The D

In [48]:
print('{} venues were returned by Foursquare.'.format(toronto_venues.shape[0]))

2240 venues were returned by Foursquare.


In [49]:
print(toronto_venues.shape)
toronto_venues.head()
print('Unique neighbrohoods in the venues table: {}.'.format(len(toronto_venues['Neighborhood'].unique())))

(2240, 7)
Unique neighbrohoods in the venues table: 97.


In [50]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 259 uniques categories.


In [51]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
toronto_onehot.shape

(2240, 259)

In [53]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Adelaide, King, Richmond",0.000000,0.000000,0.000000,0.0,0.030000,0.000000,0.000000,0.010000,0.000000,...,0.0,0.00,0.010000,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000,0.010000
1,Agincourt,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.066667,0.000000,0.000000,0.000000,0.000000
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.071429,0.000000,0.000000,0.000000,0.000000,0.000000
4,"Alderwood, Long Branch",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,Bayview Village,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.2,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,"Bedford Park, Lawrence Manor East",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,Berczy Park,0.015873,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.015873,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,"Birch Cliff, Cliffside West",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Bloordale Gardens, Eringate, Markland Wood, Ol...",0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [54]:
toronto_grouped.shape

(97, 259)

In [58]:
# As three negibourhoods/coordinates did not return a results (NaN) from Foursquars search, these rows were dropped. 

df_new = pd.merge(toronto_data,toronto_grouped, how='outer', indicator=True)
print(df_new.shape)
df_new = df_new[df_new._merge != 'left_only']
print(df_new.shape)
toronto_data = df_new.drop(['_merge'], axis=1)
print(toronto_data.shape)
toronto_data[0:10]

(101, 264)
(97, 264)
(97, 263)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7857,-79.1587,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7657,-79.1753,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,Scarborough,Woburn,43.7684,-79.2176,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,Scarborough,Cedarbrae,43.7697,-79.2394,0.0,0.0,0.0,0.0,0.0,...,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M1J,Scarborough,Scarborough Village,43.7431,-79.2318,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.7262,-79.2637,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.7131,-79.285,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.7236,-79.235,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.6967,-79.2602,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,M1P,Scarborough,"Dorset Park, Scarborough Town Centre, Wexford ...",43.76,-79.269,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [72]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Hotel,Café,Steakhouse,Deli / Bodega,Asian Restaurant,Burger Joint,Restaurant,Japanese Restaurant,Gastropub
1,Agincourt,Supermarket,Badminton Court,Pool,Park,Discount Store,Bakery,Sushi Restaurant,Skating Rink,Chinese Restaurant,Department Store
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Pharmacy,Women's Store,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Exhibit,Event Space
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Park,Coffee Shop,Liquor Store,Sandwich Place,Beer Store,Fried Chicken Joint,Japanese Restaurant,Fast Food Restaurant,Video Store
4,"Alderwood, Long Branch",Candy Store,Performing Arts Venue,Gym,Sandwich Place,Pub,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
5,Bayview Village,Trail,Golf Driving Range,Construction & Landscaping,Park,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant
6,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Pharmacy,Indian Restaurant,Pub,Café,Restaurant,Butcher,Sandwich Place,Fast Food Restaurant
7,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Cheese Shop,Lounge,Café,Farmers Market,Seafood Restaurant,Bakery,Hotel
8,"Birch Cliff, Cliffside West",Skating Rink,College Stadium,Gym Pool,Gym,General Entertainment,Park,Elementary School,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
9,"Bloordale Gardens, Eringate, Markland Wood, Ol...",Fish & Chips Shop,Bank,College Rec Center,Electronics Store,Grocery Store,Carpet Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Elementary School


## 4. Cluster Neighborhoods

In [71]:
# set number of clusters
kclusters = 10

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 1, 0, 0, 8, 0, 0, 8, 0], dtype=int32)

In [73]:
toronto_merged = toronto_data
print(kmeans.labels_.shape,toronto_merged.shape)
# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood', how='left')

toronto_merged.head() # check the last columns!

(97,) (97, 264)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.7857,-79.1587,0.0,0.0,0.0,0.0,0.0,...,Golf Course,Bar,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Event Space,Dog Run
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7657,-79.1753,0.0,0.0,0.0,0.0,0.0,...,Construction & Landscaping,Gym / Fitness Center,Park,Tea Room,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
3,M1G,Scarborough,Woburn,43.7684,-79.2176,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Business Service,Park,Korean Restaurant,Women's Store,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space
4,M1H,Scarborough,Cedarbrae,43.7697,-79.2394,0.0,0.0,0.0,0.0,0.0,...,Trail,Playground,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space
5,M1J,Scarborough,Scarborough Village,43.7431,-79.2318,0.0,0.0,0.0,0.0,0.0,...,Indian Restaurant,Grocery Store,Train Station,Restaurant,Women's Store,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant


In [74]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine clusters

In [75]:
# cluster 1
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Golf Course,Bar,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Event Space,Dog Run
2,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Construction & Landscaping,Gym / Fitness Center,Park,Tea Room,Women's Store,Ethiopian Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
4,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Trail,Playground,Women's Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space
5,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Indian Restaurant,Grocery Store,Train Station,Restaurant,Women's Store,Electronics Store,Dog Run,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
7,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Coffee Shop,Bakery,Bus Line,Metro Station,Intersection,Soccer Field,Falafel Restaurant,Exhibit,Farm,Dumpling Restaurant
8,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Fast Food Restaurant,Pharmacy,Sandwich Place,Furniture / Home Store,Liquor Store,Coffee Shop,Pizza Place,Wings Joint,Discount Store,Falafel Restaurant
10,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Bakery,Brewery,Gift Shop,Event Space,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Exhibit,Donut Shop
11,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Convenience Store,Auto Garage,Women's Store,Exhibit,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space,Falafel Restaurant
13,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Pizza Place,Pharmacy,Golf Course,Shopping Mall,Bus Stop,Fried Chicken Joint,Thai Restaurant,Chinese Restaurant,Hobby Shop,Costume Shop
14,Scarborough,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.0,...,Pharmacy,Women's Store,Dog Run,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Exhibit,Event Space


In [76]:
# Cluster 2
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Business Service,Park,Korean Restaurant,Women's Store,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space


In [77]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
82,West Toronto,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,...,Coffee Shop,Eastern European Restaurant,Sushi Restaurant,Bakery,Food & Drink Shop,Bookstore,Breakfast Spot,American Restaurant,Gift Shop,Thai Restaurant


In [78]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,Downtown Toronto,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,0.010309,0.010309,...,Coffee Shop,Clothing Store,Tea Room,Cosmetics Shop,Italian Restaurant,Burger Joint,Plaza,Bubble Tea Shop,Sandwich Place,Bar
69,Downtown Toronto,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,...,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Seafood Restaurant,Bakery,Steakhouse,Gym,Asian Restaurant


In [79]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Arepa Restaurant,Art Gallery,Art Museum,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Supermarket,Badminton Court,Pool,Park,Discount Store,Bakery,Sushi Restaurant,Skating Rink,Chinese Restaurant,Department Store
31,North York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Hotel,Park,Mobile Phone Shop,Women's Store,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School,Ethiopian Restaurant,Event Space
32,North York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Health & Beauty Service,Construction & Landscaping,Business Service,Moving Target,Donut Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Exhibit
42,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pizza Place,Sandwich Place,Gym,Burger Joint,Steakhouse,Food & Drink Shop,Board Shop,Fish & Chips Shop,Light Rail Station,Fast Food Restaurant
77,West Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,...,Coffee Shop,Café,Restaurant,Furniture / Home Store,Bar,Italian Restaurant,Bakery,Sandwich Place,Supermarket,Hotel
86,Etobicoke,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Park,Yoga Studio,Convenience Store,Grocery Store,Skating Rink,Event Space,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Elementary School
91,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pharmacy,Bank,Café,Park,Grocery Store,Skating Rink,Shopping Mall,Elementary School,Dumpling Restaurant,Eastern European Restaurant
97,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pizza Place,Coffee Shop,Chinese Restaurant,Intersection,Sandwich Place,Middle Eastern Restaurant,Elementary School,Dumpling Restaurant,Eastern European Restaurant,Electronics Store
