### Read the table

In [2]:
import pandas as pd
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url,header=0)[0]

In [3]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


### Drop the borough that is not assigned

In [4]:
df.drop(index=(df.loc[(df['Borough']=='Not assigned')].index),inplace=True)

In [5]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


### Reset Index

In [6]:
df1=df.reset_index(drop=True)

In [7]:
df1

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Deal with the not assigned neighbourhood

In [10]:
df1['Neighbourhood'].loc[df1['Neighbourhood'] == 'Not assigned'] =  df1.Borough
df1

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Check the size 

In [11]:
df1.shape

(103, 3)

### load the coordinate


In [16]:
coordinates = pd.read_csv("http://cocl.us/Geospatial_data")
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge 2 tables

In [17]:
toronto = df1.merge(coordinates, on="Postal Code", how="left")
toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Rename postalcode

In [18]:
toronto.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
toronto

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### import libraries to visualize

In [22]:
!python -m pip install --upgrade pip

Collecting pip
  Downloading https://files.pythonhosted.org/packages/cb/28/91f26bd088ce8e22169032100d4260614fc3da435025ff389ef1d396a433/pip-20.2.4-py2.py3-none-any.whl (1.5MB)
Installing collected packages: pip
  Found existing installation: pip 18.0
    Uninstalling pip-18.0:
      Successfully uninstalled pip-18.0
Successfully installed pip-20.2.4


In [23]:
! pip install geopy



In [25]:
! pip install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [26]:
import numpy as np # library to handle data in a vectorized manner
import json
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 

print("Libraries imported.")

Libraries imported.


### Get Location of Toronto

In [29]:
address = 'Toronto'
geolocator = Nominatim(user_agent = "toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto with neighborhoods superimposed on top.

In [33]:
# create map of New York using latitude and longitude values
map_toro = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toro)  
    
map_toro

### Data in Toronto with foursquare

In [85]:
# define Foursquare Credentials and Version
CLIENT_ID = '...' # your Foursquare ID
CLIENT_SECRET = '...' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ...
CLIENT_SECRET:...


### Top 100 venue withinR= 500 meters

In [35]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighbourhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['PostalCode'], toronto['Borough'], toronto['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighbourhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

### convert the venues list into a new DataFrame


In [36]:
venues_df = pd.DataFrame(venues)
# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighbourhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head()

(2136, 9)


Unnamed: 0,PostalCode,Borough,Neighbourhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M3A,North York,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,M3A,North York,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,M4A,North York,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,M4A,North York,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
4,M4A,North York,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


### Check how many venues were returned for each PostalCode

In [37]:
venues_df.groupby(["PostalCode", "Borough", "Neighbourhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighbourhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M1B,Scarborough,"Malvern, Rouge",1,1,1,1,1,1
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",2,2,2,2,2,2
M1E,Scarborough,"Guildwood, Morningside, West Hill",8,8,8,8,8,8
M1G,Scarborough,Woburn,4,4,4,4,4,4
M1H,Scarborough,Cedarbrae,8,8,8,8,8,8
M1J,Scarborough,Scarborough Village,3,3,3,3,3,3
M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",4,4,4,4,4,4
M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",10,10,10,10,10,10
M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",2,2,2,2,2,2
M1N,Scarborough,"Birch Cliff, Cliffside West",4,4,4,4,4,4


### Check the number of unique categories curated from all the returned venues

In [38]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 273 uniques categories.


In [39]:
venues_df['VenueCategory'].unique()[:50]

array(['Park', 'Food & Drink Shop', 'Hockey Arena',
       'Portuguese Restaurant', 'Coffee Shop', 'Intersection',
       'Pizza Place', 'Bakery', 'Distribution Center', 'Spa',
       'Restaurant', 'Pub', 'Historic Site', 'Breakfast Spot',
       'Gym / Fitness Center', 'Farmers Market', 'Chocolate Shop',
       'Dessert Shop', 'Performing Arts Venue', 'Theater',
       'Mexican Restaurant', 'French Restaurant', 'Yoga Studio', 'Café',
       'Event Space', 'Shoe Store', 'Art Gallery', 'Electronics Store',
       'Brewery', 'Beer Store', 'Bank', 'Hotel', 'Antique Shop',
       'Boutique', 'Furniture / Home Store', 'Vietnamese Restaurant',
       'Clothing Store', 'Accessories Store', "Women's Store",
       'Gift Shop', 'Italian Restaurant', 'Beer Bar', 'Creperie',
       'Sushi Restaurant', 'Hobby Shop', 'Diner', 'Fried Chicken Joint',
       'Chinese Restaurant', 'Smoothie Shop', 'Sandwich Place'],
      dtype=object)

### Analyze each neighbourhood

In [41]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighbourhoods'] = venues_df['Neighbourhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(2136, 276)


Unnamed: 0,PostalCode,Borough,Neighbourhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M3A,North York,Parkwoods,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M3A,North York,Parkwoods,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4A,North York,Victoria Village,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4A,North York,Victoria Village,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4A,North York,Victoria Village,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighbourhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(100, 276)


Unnamed: 0,PostalCode,Borough,Neighbourhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Malvern, Rouge",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
3,M1G,Scarborough,Woburn,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
4,M1H,Scarborough,Cedarbrae,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
5,M1J,Scarborough,Scarborough Village,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
9,M1N,Scarborough,"Birch Cliff, Cliffside West",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000


### Top 10 venues

In [61]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighbourhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighbourhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighbourhoods_venues_sorted['Neighbourhoods'] = toronto_grouped['Neighbourhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighbourhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighbourhoods_venues_sorted.shape)
neighbourhoods_venues_sorted

(100, 13)


Unnamed: 0,PostalCode,Borough,Neighbourhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",Fast Food Restaurant,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Health & Beauty Service
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",Construction & Landscaping,Bar,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",Breakfast Spot,Restaurant,Electronics Store,Medical Center,Rental Car Location,Intersection,Mexican Restaurant,Bank,Yoga Studio,Doner Restaurant
3,M1G,Scarborough,Woburn,Coffee Shop,Mexican Restaurant,Korean Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Yoga Studio
4,M1H,Scarborough,Cedarbrae,Gas Station,Fried Chicken Joint,Bakery,Bank,Athletics & Sports,Thai Restaurant,Caribbean Restaurant,Hakka Restaurant,Electronics Store,Eastern European Restaurant
5,M1J,Scarborough,Scarborough Village,Playground,Smoke Shop,Jewelry Store,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",Department Store,Hobby Shop,Coffee Shop,Train Station,Event Space,Ethiopian Restaurant,Escape Room,Falafel Restaurant,Electronics Store,Dim Sum Restaurant
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",Bus Line,Bakery,Metro Station,Soccer Field,Ice Cream Shop,Bus Station,Park,Intersection,Eastern European Restaurant,Dumpling Restaurant
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",American Restaurant,Motel,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Yoga Studio
9,M1N,Scarborough,"Birch Cliff, Cliffside West",College Stadium,Café,General Entertainment,Skating Rink,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore


In [62]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighbourhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(100, 276)


Unnamed: 0,PostalCode,Borough,Neighbourhoods,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,Scarborough,"Malvern, Rouge",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
3,M1G,Scarborough,Woburn,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
4,M1H,Scarborough,Cedarbrae,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
5,M1J,Scarborough,Scarborough Village,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000
9,M1N,Scarborough,"Birch Cliff, Cliffside West",0.000000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.0,0.000000,0.000000,0.000000,0.0,0.000000,0.0000,0.000000,0.000000


### Cluster Areas

In [63]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighbourhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 4, 0, 2, 0, 0, 2, 0, 2, 0])

In [67]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto.copy()


neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.drop(["Borough", "Neighbourhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(103, 16)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Park,Food & Drink Shop,Yoga Studio,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Pizza Place,Hockey Arena,Coffee Shop,Portuguese Restaurant,Intersection,Construction & Landscaping,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Escape Room
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,2.0,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Café,Theater,Spa,Brewery,Shoe Store
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2.0,Clothing Store,Accessories Store,Boutique,Gift Shop,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Vietnamese Restaurant,Airport Service
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2.0,Coffee Shop,Yoga Studio,College Auditorium,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Café,Restaurant,Chinese Restaurant


In [68]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(103, 16)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,0.0,Gym,Social Club,Discount Store,Bakery,Tanning Salon,Burrito Place,Burger Joint,Kids Store,Sandwich Place,Thrift / Vintage Store
45,M2L,North York,"York Mills, Silver Hills",43.757490,-79.374714,0.0,Martial Arts School,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
65,M1P,Scarborough,"Dorset Park, Wexford Heights, Scarborough Town...",43.757410,-79.273304,0.0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0.0,Grocery Store,Café,Park,Nightclub,Candy Store,Italian Restaurant,Baby Store,Athletics & Sports,Coffee Shop,Restaurant
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Gas Station,Fried Chicken Joint,Bakery,Bank,Athletics & Sports,Thai Restaurant,Caribbean Restaurant,Hakka Restaurant,Electronics Store,Eastern European Restaurant
27,M2H,North York,Hillcrest Village,43.803762,-79.363452,0.0,Athletics & Sports,Pool,Mediterranean Restaurant,Dog Run,Golf Course,Yoga Studio,Drugstore,Discount Store,Distribution Center,Doner Restaurant
63,M6N,York,"Runnymede, The Junction North",43.673185,-79.487262,0.0,Convenience Store,Breakfast Spot,Brewery,Bus Line,Yoga Studio,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
62,M5N,Central Toronto,Roselawn,43.711695,-79.416936,0.0,Garden,Music Venue,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio,Diner
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,0.0,Bakery,Pharmacy,Music Venue,Bank,Bar,Supermarket,Middle Eastern Restaurant,Café,Brewery,Park
32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0.0,Playground,Smoke Shop,Jewelry Store,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop


#### visualize the resulting clusters

In [72]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)



In [77]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        
         color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        
        
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#### Cluster 1~5

In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
102,Etobicoke,0.0,Gym,Social Club,Discount Store,Bakery,Tanning Salon,Burrito Place,Burger Joint,Kids Store,Sandwich Place,Thrift / Vintage Store
45,North York,0.0,Martial Arts School,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
65,Scarborough,0.0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
25,Downtown Toronto,0.0,Grocery Store,Café,Park,Nightclub,Candy Store,Italian Restaurant,Baby Store,Athletics & Sports,Coffee Shop,Restaurant
26,Scarborough,0.0,Gas Station,Fried Chicken Joint,Bakery,Bank,Athletics & Sports,Thai Restaurant,Caribbean Restaurant,Hakka Restaurant,Electronics Store,Eastern European Restaurant
27,North York,0.0,Athletics & Sports,Pool,Mediterranean Restaurant,Dog Run,Golf Course,Yoga Studio,Drugstore,Discount Store,Distribution Center,Doner Restaurant
63,York,0.0,Convenience Store,Breakfast Spot,Brewery,Bus Line,Yoga Studio,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
62,Central Toronto,0.0,Garden,Music Venue,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio,Diner
31,West Toronto,0.0,Bakery,Pharmacy,Music Venue,Bank,Bar,Supermarket,Middle Eastern Restaurant,Café,Brewery,Park
32,Scarborough,0.0,Playground,Smoke Shop,Jewelry Store,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop


In [84]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,Scarborough,1.0,Playground,Park,Bakery,Yoga Studio,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
64,York,1.0,Park,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
91,Downtown Toronto,1.0,Park,Playground,Trail,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
49,North York,1.0,Bakery,Park,Construction & Landscaping,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
66,North York,1.0,Park,Convenience Store,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
61,Central Toronto,1.0,Park,Swim School,Bus Line,Yoga Studio,Drugstore,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
0,North York,1.0,Park,Food & Drink Shop,Yoga Studio,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
40,North York,1.0,Airport,Park,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
21,York,1.0,Park,Women's Store,Pool,Yoga Studio,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
35,East York,1.0,Intersection,Park,Convenience Store,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore


In [81]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
43,West Toronto,2.0,Café,Coffee Shop,Breakfast Spot,Nightclub,Burrito Place,Furniture / Home Store,Climbing Gym,Bakery,Italian Restaurant,Bar
75,West Toronto,2.0,Gift Shop,Breakfast Spot,Bookstore,Italian Restaurant,Bar,Restaurant,Dessert Shop,Movie Theater,Eastern European Restaurant,Coffee Shop
76,Mississauga,2.0,Coffee Shop,Hotel,Intersection,Gym,Mediterranean Restaurant,Fried Chicken Joint,Burrito Place,Gas Station,American Restaurant,Middle Eastern Restaurant
13,North York,2.0,Gym,Coffee Shop,Beer Store,Clothing Store,Asian Restaurant,Supermarket,Japanese Restaurant,Italian Restaurant,Discount Store,Dim Sum Restaurant
79,Central Toronto,2.0,Sandwich Place,Dessert Shop,Pizza Place,Gym,Café,Italian Restaurant,Coffee Shop,Sushi Restaurant,Thai Restaurant,Seafood Restaurant
80,Downtown Toronto,2.0,Café,Bookstore,Bakery,Bar,Japanese Restaurant,Sandwich Place,Dessert Shop,Pub,Restaurant,Coffee Shop
81,West Toronto,2.0,Café,Coffee Shop,Pub,Pizza Place,Italian Restaurant,Sushi Restaurant,Yoga Studio,Gourmet Shop,Sandwich Place,Restaurant
82,Scarborough,2.0,Pizza Place,Gas Station,Noodle House,Chinese Restaurant,Fast Food Restaurant,Fried Chicken Joint,Bank,Italian Restaurant,Intersection,Thai Restaurant
84,Downtown Toronto,2.0,Café,Mexican Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Bar,Vietnamese Restaurant,Dessert Shop,Dumpling Restaurant,Burger Joint,Bakery
9,Downtown Toronto,2.0,Clothing Store,Coffee Shop,Café,Japanese Restaurant,Bubble Tea Shop,Cosmetics Shop,Theater,Italian Restaurant,Pizza Place,Ramen Restaurant


In [80]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,3.0,Print Shop,Dumpling Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Yoga Studio,Dim Sum Restaurant


In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,4.0,Baseball Field,Yoga Studio,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Field
101,Etobicoke,4.0,Construction & Landscaping,Baseball Field,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
12,Scarborough,4.0,Construction & Landscaping,Bar,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store


### Conclusion

Most of the neibours are stores and restaurants in cluster 1，cluster 2 are mostly Parks , 3 are cafe shops and 4 are print shops and stores，5 are some Construction and Landscaping for entertainment