In [1]:
import requests
import lxml.html as lh
import bs4 as bs
import urllib.request
import numpy as np 
import pandas as pd

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url)
soup = bs.BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))

In [3]:
data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


choosing only data where field Borough doesn't have not assigned value

In [4]:
raw_data_selected = data[data['Borough'] != 'Not assigned']

Grouping Data

In [7]:
raw_data_selected = raw_data_selected.groupby(['Borough', 'Postal Code'], as_index=False).agg(','.join)

In [8]:
raw_data_selected.head()

Unnamed: 0,Borough,Postal Code,Neighbourhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


Replacing values in Neighbourhood field with Borough where Neighbourhood is not assigned

In [9]:
raw_data_selected['Neighbourhood']= np.where(raw_data_selected['Neighbourhood'] == \
                                             'Not assigned', raw_data_selected['Borough'], raw_data_selected['Neighbourhood'])

In [10]:
raw_data_selected.shape

(103, 3)

In [19]:
geospatial_url = "https://cocl.us/Geospatial_data"
geospatial_data = pd.read_csv(geospatial_url)
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
geospatial_data.columns = ['Postal Code', 'Latitude', 'Longitude']

In [34]:
df = pd.merge(raw_data_selected, geospatial_data, on='Postal Code')
df.head()

Unnamed: 0,Borough,Postal Code,Neighbourhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.38879
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316


In [31]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!pip install folium
import folium
import requests
import json
from pandas.io.json import json_normalize
print('Libraries imported.')

Libraries imported.


### Getting geographical coordinates of Toronto and creating a map of Toronto city with all the postal codes

In [32]:
toronto_add='Toronto, Ontario' #CITY ADDRESS
geolocator=Nominatim(user_agent='toronto_explorer')
location=geolocator.geocode(toronto_add)
latitude=location.latitude
longitude=location.longitude
print("The coordinates of Toronto are {},{}".format(latitude,longitude))

The coordinates of Toronto are 43.6534817,-79.3839347


In [37]:
Toronto_map=folium.Map([latitude,longitude],zoom_start=10)
for lat,long,postalcode,borough,neighborhood in zip(df['Latitude'],df['Longitude'],df['Postal Code'],df['Borough'],df['Neighbourhood']):
    label='Postal Code:{},Neighborhoods:{},Borough:{}'.format(postalcode,neighborhood,borough)
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,long],radius=5,color='blue',popup=label,
                        fill=True,fill_color='#3186cc',fill_opacity=0.7,
                        parse_html=True).add_to(Toronto_map)
Toronto_map

Define Foursquare credentials

In [38]:
CLIENT_ID = 'NYJS2WTJBZEXR403GXWVG4AGM52BYFMSMM2CZI3FQTHUNXSC'
CLIENT_SECRET = 'HSMJAMFG1XXHANO1HONAJ5UNLDPHMEF2RM2GQJNX5W2JBHY1'
VERSION = '20180604'

### We create a function to extract dataframe of top 100 venues within 500m radius of each of the postal codes in Toronto

In [39]:
limit=100
def getNearbyVenues(codes,names,latitudes,longitudes,radius=500):
    venues_list=[]
    for code,name,lat,lng in zip(codes,names,latitudes,longitudes):
        url='https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,CLIENT_SECRET,VERSION,lat,lng,radius,limit)
        results=requests.get(url).json()['response']['groups'][0]['items']
        venues_list.append([(code,name,lat,lng,v['venue']['name'],v['venue']['location']['lat'],
                          v['venue']['location']['lng'],
                          v['venue']['categories'][0]['name'])for v in results])
    nearby_venues=pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns=['PostalCode','Neighborhood','Neighborhood Latitude',
                          'Neighborhood Longitude','Venue name','Venue Latitude','Venue Longitude','Venue Category']
    return(nearby_venues)

In [41]:
Toronto_Venues=getNearbyVenues(codes=df['Postal Code'],
                               names=df['Neighbourhood'],
                               latitudes=df['Latitude'],
                               longitudes=df['Longitude'],radius=500)
Toronto_Venues.head()

Unnamed: 0,PostalCode,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue name,Venue Latitude,Venue Longitude,Venue Category
0,M4N,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,M4N,Lawrence Park,43.72802,-79.38879,HYC Design Inc.,43.726793,-79.391681,Business Service
2,M4N,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
3,M4N,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
4,M4P,Davisville North,43.712751,-79.390197,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop


In [42]:
Toronto_Venues.shape

(2119, 8)

### Now, we check how many venues were returned for each postal code and get the number of unique venue categories that were returned

In [43]:
Toronto_Venues.groupby('PostalCode').count()[['Venue name']]

Unnamed: 0_level_0,Venue name
PostalCode,Unnamed: 1_level_1
M1B,1
M1C,1
M1E,8
M1G,3
M1H,8
...,...
M9N,2
M9P,7
M9R,4
M9V,10


In [44]:
print('There are {} unique venue categories'.format(len(Toronto_Venues['Venue Category'].unique())))

There are 273 unique venue categories


### Now we analyze each neighborhood by creating a dataframe with one hot encoding

In [45]:
Toronto_onehot=pd.get_dummies(Toronto_Venues['Venue Category'],prefix='',prefix_sep='')
Toronto_onehot['PostalCode']=Toronto_Venues['PostalCode']
columns=[Toronto_onehot.columns[-1]]+list(Toronto_onehot.columns[:-1])
Toronto_onehot=Toronto_onehot[columns]
Toronto_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4N,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4N,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4N,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4N,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4P,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
Toronto_onehot.shape

(2119, 274)

In [47]:
X=Toronto_Venues['Neighborhood']
Toronto_onehot[-1]=X
Toronto_onehot=Toronto_onehot.rename({-1:'Neighborhoods'},axis=1)
col=[Toronto_onehot.columns[0]]+[Toronto_onehot.columns[-1]]+list(Toronto_onehot.columns[1:-1])
Toronto_onehot=Toronto_onehot[col]
Toronto_onehot.head()

Unnamed: 0,PostalCode,Neighborhoods,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4N,Lawrence Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4N,Lawrence Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4N,Lawrence Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4N,Lawrence Park,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4P,Davisville North,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Let's group rows by postal code by taking the mean of the frequency of occurrence of each category

In [48]:
Toronto_grouped=Toronto_onehot.groupby(['PostalCode','Neighborhoods'],sort=False).mean().reset_index()
Toronto_grouped.head()

Unnamed: 0,PostalCode,Neighborhoods,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4N,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4P,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M4R,"North Toronto West, Lawrence Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,M4S,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M4T,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Now, we print each neighborhood along with top 5 venues for first 10 postal codes

In [49]:
num=5
for code,nbd in zip(Toronto_grouped['PostalCode'][0:10],Toronto_grouped['Neighborhoods'][0:10]):
    print('----'+code+':'+nbd+'----')
    temp=Toronto_grouped[Toronto_grouped['PostalCode']==code].T.reset_index()
    temp=temp.iloc[2:]
    temp.columns=['venue category','freq']
    temp['freq']=temp['freq'].astype(float)
    temp.sort_values(by=['freq'],ascending=False,inplace=True)
    temp=temp.round({'freq':2})
    print(temp.reset_index(drop=True).head(num))

----M4N:Lawrence Park----
      venue category  freq
0        Swim School  0.25
1   Business Service  0.25
2               Park  0.25
3           Bus Line  0.25
4  Accessories Store  0.00
----M4P:Davisville North----
      venue category  freq
0        Pizza Place  0.11
1  Food & Drink Shop  0.11
2   Department Store  0.11
3       Dance Studio  0.11
4     Sandwich Place  0.11
----M4R:North Toronto West, Lawrence Park----
        venue category  freq
0          Coffee Shop  0.12
1       Clothing Store  0.12
2          Yoga Studio  0.06
3  Sporting Goods Shop  0.06
4                 Park  0.06
----M4S:Davisville----
     venue category  freq
0       Pizza Place  0.08
1    Sandwich Place  0.08
2      Dessert Shop  0.08
3              Café  0.06
4  Sushi Restaurant  0.06
----M4T:Moore Park, Summerhill East----
              venue category  freq
0               Tennis Court  0.33
1                 Playground  0.33
2                     Lawyer  0.33
3          Accessories Store  0.00
4  Midd

### Now we define a function to sort the values in decending order

In [50]:
def most_common_venues(row,num_of_values):
    row_sorted=row.iloc[2:].sort_values(ascending=False)
    return row_sorted.index.values[0:num_of_values]

### and creat a dataframe with the top 10 venues of each postal code

In [51]:
num_of_values=10
indicators=['st','nd','rd']
columns=['PostalCode','Neighborhoods']
for ind in np.arange(num_of_values):
    try:
        columns.append('{}{} Most common venue'.format(ind+1,indicators[ind]))
    except:
        columns.append('{}th Most common venue'.format(ind+1))
Toronto_top=pd.DataFrame(columns=columns)
Toronto_top['PostalCode']=Toronto_grouped['PostalCode']
Toronto_top['Neighborhoods']=Toronto_grouped['Neighborhoods']
for ind in np.arange(Toronto_grouped.shape[0]):
    Toronto_top.iloc[ind][2:]=most_common_venues(Toronto_grouped.iloc[ind],num_of_values)
Toronto_top.head()

Unnamed: 0,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,M4N,Lawrence Park,Park,Bus Line,Swim School,Business Service,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
1,M4P,Davisville North,Dance Studio,Food & Drink Shop,Gym / Fitness Center,Park,Pizza Place,Breakfast Spot,Sandwich Place,Hotel,Department Store,Dim Sum Restaurant
2,M4R,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Chinese Restaurant,Spa,Sporting Goods Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Salon / Barbershop
3,M4S,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Gym,Coffee Shop,Thai Restaurant,Seafood Restaurant
4,M4T,"Moore Park, Summerhill East",Lawyer,Playground,Tennis Court,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run


### Now we run K means clustering to cluster the similar postal codes/Neighborhoods

#### The features needed to run  kmeans clustering is the frequency of venue category.
#### since K Means does not consider categorical variables, we drop the PostalCode and Neighborhoods columns from the Toronto_grouped dataframe

In [52]:
Toronto_grouped.drop(['PostalCode','Neighborhoods'],axis=1,inplace=True)
Toronto_grouped.head()

Unnamed: 0,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
k=5 # number of clusters
k_means=KMeans(n_clusters=k,random_state=0)
k_means.fit(Toronto_grouped)

KMeans(n_clusters=5, random_state=0)

In [54]:
k_means.labels_[0:10] #We check the cluster labels of the first 10 rows

array([0, 3, 3, 3, 3, 3, 3, 3, 3, 0], dtype=int32)

### Now, we add the cluster labels column to the Toronto_top dataframe

In [55]:
Toronto_top.insert(0,' Cluster labels',k_means.labels_)
Toronto_top.head()

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,0,M4N,Lawrence Park,Park,Bus Line,Swim School,Business Service,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
1,3,M4P,Davisville North,Dance Studio,Food & Drink Shop,Gym / Fitness Center,Park,Pizza Place,Breakfast Spot,Sandwich Place,Hotel,Department Store,Dim Sum Restaurant
2,3,M4R,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Chinese Restaurant,Spa,Sporting Goods Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Salon / Barbershop
3,3,M4S,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Gym,Coffee Shop,Thai Restaurant,Seafood Restaurant
4,3,M4T,"Moore Park, Summerhill East",Lawyer,Playground,Tennis Court,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run


### Now we merge the dataframe Toronto_top with df to add the latitude and longitude of each postal code

In [57]:
Toronto_data=df
Toronto_top=Toronto_top.join(Toronto_data.set_index('Postal Code'),on=['PostalCode'])
Toronto_top.head()

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue,Borough,Neighbourhood,Latitude,Longitude
0,0,M4N,Lawrence Park,Park,Bus Line,Swim School,Business Service,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant,Central Toronto,Lawrence Park,43.72802,-79.38879
1,3,M4P,Davisville North,Dance Studio,Food & Drink Shop,Gym / Fitness Center,Park,Pizza Place,Breakfast Spot,Sandwich Place,Hotel,Department Store,Dim Sum Restaurant,Central Toronto,Davisville North,43.712751,-79.390197
2,3,M4R,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Chinese Restaurant,Spa,Sporting Goods Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Salon / Barbershop,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,3,M4S,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Gym,Coffee Shop,Thai Restaurant,Seafood Restaurant,Central Toronto,Davisville,43.704324,-79.38879
4,3,M4T,"Moore Park, Summerhill East",Lawyer,Playground,Tennis Court,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [59]:
Toronto_top.drop(['Neighbourhood'],axis=1,inplace=True)

### Visualising the clusters

In [60]:
map_clusters=folium.Map([latitude,longitude],zoom_start=10)
x=np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
for code,nbd,lat,lng,clus in zip(Toronto_top['PostalCode'],Toronto_top['Neighborhoods'],Toronto_top['Latitude'],Toronto_top['Longitude'],Toronto_top[' Cluster labels']):
    label=folium.Popup('Postal Code:{};Neighborhoods:{};Cluster:{}'.format(code,nbd,clus),parse_html=True)
    folium.CircleMarker([lat,lng],popup=label,color=rainbow[clus-1],
                       fill=True,
                       fill_color=rainbow[clus-1],
                       fill_opacity=0.6,
                       radius=5).add_to(map_clusters)

In [61]:
map_clusters

### Examining the clusters

Cluster 1: Cluster Label 0

In [62]:
Toronto_top.loc[Toronto_top[' Cluster labels']==0,Toronto_top.columns[:-3]].reset_index(drop=True)

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,0,M4N,Lawrence Park,Park,Bus Line,Swim School,Business Service,Dog Run,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
1,0,M4W,Rosedale,Park,Playground,Trail,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
2,0,M4E,The Beaches,Park,Neighborhood,Health Food Store,Pub,Trail,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
3,0,M8X,"The Kingsway, Montgomery Road, Old Mill North",Park,River,Yoga Studio,Dog Run,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Doner Restaurant
4,0,M9R,"Kingsview Village, St. Phillips, Martin Grove ...",Park,Mobile Phone Shop,Sandwich Place,Bus Line,Dog Run,Diner,Discount Store,Distribution Center,Doner Restaurant,Dessert Shop
5,0,M2M,"Willowdale, Newtonbrook",Park,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
6,0,M3A,Parkwoods,Food & Drink Shop,Park,Construction & Landscaping,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
7,0,M3K,Downsview,Park,Airport,Business Service,Fabric Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop
8,0,M6L,"North Park, Maple Leaf Park, Upwood Park",Park,Construction & Landscaping,Bakery,Basketball Court,Ethiopian Restaurant,Electronics Store,Event Space,Eastern European Restaurant,Dumpling Restaurant,Dim Sum Restaurant
9,0,M1V,"Milliken, Agincourt North, Steeles East, L'Amo...",Park,Intersection,Playground,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Donut Shop


Cluster 2: Cluster Label 1

In [63]:
Toronto_top.loc[Toronto_top[' Cluster labels']==1,Toronto_top.columns[:-3]].reset_index(drop=True)

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,1,M4J,"East Toronto, Broadview North (Old East York)",Park,Convenience Store,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,1,M2P,York Mills West,Park,Convenience Store,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
2,1,M6N,"Runnymede, The Junction North",Convenience Store,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio,Dessert Shop


Cluster 3: Cluster Label 2

In [64]:
Toronto_top.loc[Toronto_top[' Cluster labels']==2,Toronto_top.columns[:-3]].reset_index(drop=True)

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,2,M8Y,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Baseball Field,Yoga Studio,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
1,2,M3M,Downsview,Food Truck,Baseball Field,Yoga Studio,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore,Dim Sum Restaurant


Cluster 4: Cluster Label 3

In [65]:
Toronto_top.loc[Toronto_top[' Cluster labels']==3,Toronto_top.columns[:-3]].reset_index(drop=True)

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,3,M4P,Davisville North,Dance Studio,Food & Drink Shop,Gym / Fitness Center,Park,Pizza Place,Breakfast Spot,Sandwich Place,Hotel,Department Store,Dim Sum Restaurant
1,3,M4R,"North Toronto West, Lawrence Park",Coffee Shop,Clothing Store,Yoga Studio,Chinese Restaurant,Spa,Sporting Goods Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Salon / Barbershop
2,3,M4S,Davisville,Dessert Shop,Sandwich Place,Pizza Place,Sushi Restaurant,Café,Italian Restaurant,Gym,Coffee Shop,Thai Restaurant,Seafood Restaurant
3,3,M4T,"Moore Park, Summerhill East",Lawyer,Playground,Tennis Court,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run
4,3,M4V,"Summerhill West, Rathnelly, South Hill, Forest...",Coffee Shop,American Restaurant,Liquor Store,Supermarket,Sushi Restaurant,Bank,Restaurant,Bagel Shop,Fried Chicken Joint,Pub
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,3,M6P,"High Park, The Junction South",Grocery Store,Café,Mexican Restaurant,Thai Restaurant,Diner,Bar,Irish Pub,Bakery,Italian Restaurant,Fried Chicken Joint
78,3,M6R,"Parkdale, Roncesvalles",Breakfast Spot,Gift Shop,Bookstore,Cuban Restaurant,Dessert Shop,Bar,Restaurant,Dog Run,Movie Theater,Italian Restaurant
79,3,M6S,"Runnymede, Swansea",Coffee Shop,Café,Sushi Restaurant,Italian Restaurant,Pub,Dessert Shop,Smoothie Shop,Bookstore,Sandwich Place,Burrito Place
80,3,M6C,Humewood-Cedarvale,Hockey Arena,Field,Garden,Trail,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run


Cluster 5: Cluster Label 4

In [66]:
Toronto_top.loc[Toronto_top[' Cluster labels']==4,Toronto_top.columns[:-3]].reset_index(drop=True)

Unnamed: 0,Cluster labels,PostalCode,Neighborhoods,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,4,M1J,Scarborough Village,Playground,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore


### In this notebook we can observe the similiarity of different neighborhoods across Toronto based on the types of venues they have as well as group them together based on these attributes.