#  Using Clustering to find Optimal Position for Grocery Warehouse
##   Scrape a table from Wikipedia: List of postal codes of Canada: M
Let's import some Libraries

In [1]:
from  bs4  import  BeautifulSoup
import  requests
import pandas as pd

Get the Html link

In [2]:
source=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

Use 'LXML' parser in 'Beautiful Soup'

In [3]:
soup=BeautifulSoup(source,'lxml')

In [4]:
table=soup.findAll('table',{'class':'wikitable sortable'})

In [5]:
df=pd.read_html(str(table), header=0)[0]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


After Importing the table, let's start Pre-Processing 

In [6]:
df=df[df.Borough!='Not assigned']

In [7]:
df.reset_index(drop=True,inplace=True)

In [8]:
df.head(3)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront


In [9]:
df=df.groupby(['Postcode','Borough'],as_index=False).agg(lambda x:','.join(set(x.dropna())))

In [10]:
df.loc[df.Neighbourhood=='Not assigned','Neighbourhood']=df.Borough

In [11]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern,Rouge"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Morningside,Guildwood,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Ionview,Kennedy Park,East Birchmount Park"
7,M1L,Scarborough,"Golden Mile,Clairlea,Oakridge"
8,M1M,Scarborough,"Cliffside,Scarborough Village West,Cliffcrest"
9,M1N,Scarborough,"Cliffside West,Birch Cliff"


Table after Pre-Processing

In [12]:
df.shape

(103, 3)

# Add coordinate to Neighbourhoods
## Load geospatial data from a csv file.

In [13]:
df_geo=pd.read_csv("https://cocl.us/Geospatial_data")

In [14]:
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476



The column Postal Code in the csv file is the same as the column Postcode in the dataframe. So we change the name Postal Code into Postcode so that we can merge later.

In [15]:
df_geo.rename(columns={'Postal Code':'Postcode'},inplace=True)

In [16]:
df_geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
df_merge=pd.merge(df,df_geo)

In [18]:
df_merge.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Morningside,Guildwood,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [19]:
df_toronto=df_merge

#  Fetching Venues
Let's import some libraries

In [20]:
import numpy as np # library to handle data in a vectorized manner
import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [21]:
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode('Toronto')
latitude = location.latitude
longitude = location.longitude
print('The  Geographical Coordinates of Toronto are:',  latitude, longitude)

The  Geographical Coordinates of Toronto are: 43.653963 -79.387207


In [22]:
map_toronto=folium.Map(location=[latitude,longitude],zoom_start=11)

for lat,lon,label  in  zip(df_toronto['Latitude'],df_toronto['Longitude'],df_toronto['Neighbourhood']):
    label=folium.Popup(label,parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto    

## Now, we use Foursquare to explore Toronto venues.

In [23]:
CLIENT_ID = 'R4YPEVJDPTPFAF2GSSU2WEZXDKY2CQYX5VAYYVQ12UVEJLVM' # your Foursquare ID
CLIENT_SECRET = 'YKP5NYWRG1RIYUUCLB3AWOG2XLC4DOJC3AMI220E33XYGHSM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: R4YPEVJDPTPFAF2GSSU2WEZXDKY2CQYX5VAYYVQ12UVEJLVM
CLIENT_SECRET:YKP5NYWRG1RIYUUCLB3AWOG2XLC4DOJC3AMI220E33XYGHSM


In [24]:
neighborhood_latitude = df_merge.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_merge.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_merge.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern,Rouge are 43.806686299999996, -79.19435340000001.


In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500,LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:

toronto_venues =pd.DataFrame( getNearbyVenues(names=df_merge['Neighbourhood'],
                                   latitudes=df_merge['Latitude'],
                                   longitudes=df_merge['Longitude']
                                  ))

toronto_venues.head()

Malvern,Rouge
Highland Creek,Rouge Hill,Port Union
Morningside,Guildwood,West Hill
Woburn
Cedarbrae
Scarborough Village
Ionview,Kennedy Park,East Birchmount Park
Golden Mile,Clairlea,Oakridge
Cliffside,Scarborough Village West,Cliffcrest
Cliffside West,Birch Cliff
Scarborough Town Centre,Wexford Heights,Dorset Park
Wexford,Maryvale
Agincourt
Sullivan,Clarks Corners,Tam O'Shanter
Agincourt North,Steeles East,L'Amoreaux East,Milliken
L'Amoreaux West,Steeles West
Upper Rouge
Hillcrest Village
Fairview,Oriole,Henry Farm
Bayview Village
Silver Hills,York Mills
Willowdale,Newtonbrook
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Don Mills South,Flemingdon Park
Downsview North,Wilson Heights,Bathurst Manor
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens,Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
Riverdale,The Danforth West
The Beac

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern,Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Malvern,Rouge",43.806686,-79.194353,Canadiana exhibit,43.817962,-79.193374,Zoo Exhibit
2,"Malvern,Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
3,"Malvern,Rouge",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant
4,"Malvern,Rouge",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store


In [27]:
toronto_venues.shape

(6743, 7)

In [28]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,Richmond,King",100,100,100,100,100,100
Agincourt,60,60,60,60,60,60
"Agincourt North,Steeles East,L'Amoreaux East,Milliken",73,73,73,73,73,73
"Alderwood,Long Branch",44,44,44,44,44,44
Bayview Village,12,12,12,12,12,12
"Bedford Park,Lawrence Manor East",72,72,72,72,72,72
Berczy Park,100,100,100,100,100,100
Business Reply Mail Processing Centre 969 Eastern,100,100,100,100,100,100
"CFB Toronto,Downsview East",28,28,28,28,28,28
Caledonia-Fairbanks,72,72,72,72,72,72


In [29]:
len(toronto_venues['Venue Category'].unique())

337

In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,...,Volleyball Court,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Malvern,Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
toronto_onehot.shape

(6743, 338)

# Identify Target Venues
## (ex. Restuarants )

In [32]:
important_list_of_features = [
 
 'Neighbourhood',
 'African Restaurant',
 'American Restaurant',
 'Asian Restaurant',
 'BBQ Joint',
 'Bakery',
 'Breakfast Spot',
 'Burger Joint',
 'Cajun / Creole Restaurant',
 'Cantonese Restaurant',
 'Caribbean Restaurant',
 'Chinese Restaurant',
 'Diner',
 'Fast Food Restaurant',
 'Filipino Restaurant',
 'Fish Market',
 'Food & Drink Shop',
 'Fried Chicken Joint',
 'Fruit & Vegetable Store',
 'Greek Restaurant',
 'Grocery Store',
 'Hakka Restaurant',
 'Hong Kong Restaurant',
 'Hotpot Restaurant',
 'Indian Restaurant',
 'Italian Restaurant',
 'Japanese Restaurant',
 'Korean Restaurant',
 'Latin American Restaurant',
 'Malay Restaurant',
 'Mediterranean Restaurant',
 'Mexican Restaurant',
 'Middle Eastern Restaurant',
 'Noodle House',
 'Pizza Place',
 'Restaurant',
 'Sandwich Place',
 'Seafood Restaurant',
 'Shanghai Restaurant',
  'Sushi Restaurant',
 'Taiwanese Restaurant',
  'Thai Restaurant',
  'Vegetarian / Vegan Restaurant',
  'Vietnamese Restaurant',
 'Wings Joint']

In [33]:
toronto_onehot = toronto_onehot[important_list_of_features].groupby(
    'Neighbourhood').sum()


toronto_onehot.head()

Unnamed: 0_level_0,African Restaurant,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Breakfast Spot,Burger Joint,Cajun / Creole Restaurant,Cantonese Restaurant,Caribbean Restaurant,...,Restaurant,Sandwich Place,Seafood Restaurant,Shanghai Restaurant,Sushi Restaurant,Taiwanese Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wings Joint
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Adelaide,Richmond,King",0,2,0,0,1,1,1,0,0,0,...,2,0,0,0,2,0,2,1,0,0
Agincourt,0,0,1,0,2,2,0,0,2,2,...,1,1,1,1,1,0,0,0,0,0
"Agincourt North,Steeles East,L'Amoreaux East,Milliken",0,0,0,4,5,0,0,0,2,1,...,1,0,1,0,0,0,0,1,2,0
"Alderwood,Long Branch",0,0,0,0,1,0,2,0,0,0,...,1,1,1,0,0,0,0,0,0,1
Bayview Village,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Calculating Total Potential Clients

In [34]:
feat_name_list = list(toronto_onehot.columns)
restaurant_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Restaurant') != (-1):
        restaurant_list.append(value)
        
toronto_onehot['Total Restaurants'] = toronto_onehot[restaurant_list].sum(axis = 1)
toronto_onehot = toronto_onehot.drop(columns = restaurant_list)


feat_name_list = list(toronto_onehot.columns)
joint_list = []


for counter, value in enumerate(feat_name_list):
    if value.find('Joint') != (-1):
        joint_list.append(value)
        
toronto_onehot['Total Joints'] = toronto_onehot[joint_list].sum(axis = 1)
toronto_onehot = toronto_onehot.drop(columns = joint_list)

In [35]:
toronto_onehot.head()

Unnamed: 0_level_0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
"Adelaide,Richmond,King",1,1,1,0,0,0,0,0,3,0,16,1
Agincourt,2,2,0,0,0,0,1,1,1,1,26,0
"Agincourt North,Steeles East,L'Amoreaux East,Milliken",5,0,0,0,0,0,1,2,3,0,29,4
"Alderwood,Long Branch",1,0,0,0,0,0,2,0,3,1,4,3
Bayview Village,0,0,0,0,0,0,1,0,0,0,3,0


# Clustering Using K-means
## Sorted Neighbourhoods will reveal Ideal Positions

In [36]:
kclusters =8

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_onehot)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 2, 5, 6, 0, 3, 1, 3, 0, 3, 7, 7, 7, 3, 7, 0, 4, 7, 5, 5, 1, 5,
       2, 2, 4, 6, 6, 0, 3, 0, 2, 1, 5, 7, 6, 1, 7, 1, 0, 4, 0, 5, 4, 0,
       3, 7, 4, 7, 1, 1, 3, 4, 0, 4, 6, 6, 2, 0, 1, 1, 0, 0, 7, 6, 3, 5,
       3, 7, 7, 4, 6, 6, 0, 7, 7, 1, 3, 7, 7, 5, 4, 4, 1, 1, 4, 1, 4, 1,
       2, 0, 6, 4, 6, 4, 7, 5, 6, 7, 6, 6, 4, 1, 5], dtype=int32)

## Creating New Dataset

In [44]:
means_df = pd.DataFrame(kmeans.cluster_centers_)
means_df.columns = toronto_onehot.columns
means_df['Cluster Labels'] = ['1','2','3','4','5','6','7','8']
means_df['Total Sum'] = means_df.sum(axis = 1)
means_df.sort_values(axis = 0, by = ['Total Sum'], ascending=True)

Unnamed: 0,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Cluster Labels,Total Sum
0,0.142857,0.071429,0.142857,4.1633360000000003e-17,2.775558e-17,1.0408340000000001e-17,0.571429,1.387779e-17,0.428571,0.357143,2.428571,0.285714,1,4.428571
6,0.571429,0.357143,0.285714,0.07142857,0.1428571,1.0408340000000001e-17,1.285714,1.387779e-17,2.714286,1.642857,4.428571,1.428571,7,12.928571
4,1.2,0.866667,0.2,4.1633360000000003e-17,0.1333333,0.06666667,1.733333,1.387779e-17,2.066667,2.066667,9.2,1.4,5,18.933333
1,2.25,1.0,0.5625,0.1875,0.125,1.0408340000000001e-17,1.1875,0.125,1.6875,0.875,14.75,2.375,2,25.125
7,1.0,1.166667,1.055556,0.1111111,0.05555556,6.938894e-18,1.333333,0.1666667,1.5,1.5,19.111111,2.388889,8,29.388889
2,3.5,1.666667,0.666667,0.3333333,0.3333333,3.469447e-18,0.833333,0.1666667,0.833333,1.166667,23.666667,1.0,3,34.166667
3,3.7,0.8,1.4,0.2,0.2,0.2,1.6,1.387779e-17,3.6,1.6,19.8,2.2,4,35.3
5,2.3,0.6,0.7,0.2,0.3,6.938894e-18,1.6,0.2,2.7,1.6,28.8,3.7,6,42.7


# Result 
## It is  clearly visible that Cluster 6 is best 

In [38]:
means_df.columns

Index(['Bakery', 'Breakfast Spot', 'Diner', 'Fish Market', 'Food & Drink Shop',
       'Fruit & Vegetable Store', 'Grocery Store', 'Noodle House',
       'Pizza Place', 'Sandwich Place', 'Total Restaurants', 'Total Joints',
       'Cluster Labels', 'Total Sum'],
      dtype='object')

## Making dataset easier to read

In [39]:
means_df =means_df[[ 'Cluster Labels','Bakery', 'Breakfast Spot', 'Diner', 'Fish Market', 'Food & Drink Shop',
       'Fruit & Vegetable Store', 'Grocery Store', 'Noodle House',
       'Pizza Place', 'Sandwich Place', 'Total Restaurants', 'Total Joints',
       'Total Sum']]
means_df

Unnamed: 0,Cluster Labels,Bakery,Breakfast Spot,Diner,Fish Market,Food & Drink Shop,Fruit & Vegetable Store,Grocery Store,Noodle House,Pizza Place,Sandwich Place,Total Restaurants,Total Joints,Total Sum
0,1,0.142857,0.071429,0.142857,4.1633360000000003e-17,2.775558e-17,1.0408340000000001e-17,0.571429,1.387779e-17,0.428571,0.357143,2.428571,0.285714,4.428571
1,2,2.25,1.0,0.5625,0.1875,0.125,1.0408340000000001e-17,1.1875,0.125,1.6875,0.875,14.75,2.375,25.125
2,3,3.5,1.666667,0.666667,0.3333333,0.3333333,3.469447e-18,0.833333,0.1666667,0.833333,1.166667,23.666667,1.0,34.166667
3,4,3.7,0.8,1.4,0.2,0.2,0.2,1.6,1.387779e-17,3.6,1.6,19.8,2.2,35.3
4,5,1.2,0.866667,0.2,4.1633360000000003e-17,0.1333333,0.06666667,1.733333,1.387779e-17,2.066667,2.066667,9.2,1.4,18.933333
5,6,2.3,0.6,0.7,0.2,0.3,6.938894e-18,1.6,0.2,2.7,1.6,28.8,3.7,42.7
6,7,0.571429,0.357143,0.285714,0.07142857,0.1428571,1.0408340000000001e-17,1.285714,1.387779e-17,2.714286,1.642857,4.428571,1.428571,12.928571
7,8,1.0,1.166667,1.055556,0.1111111,0.05555556,6.938894e-18,1.333333,0.1666667,1.5,1.5,19.111111,2.388889,29.388889


# Labelling original Dataset

In [43]:
neigh_summary = pd.DataFrame([toronto_onehot.index, 1 + kmeans.labels_]).T
neigh_summary.columns = ['Neighbourhood', 'Cluster Label']
neigh_summary


Unnamed: 0,Neighbourhood,Cluster Label
0,"Adelaide,Richmond,King",2
1,Agincourt,3
2,"Agincourt North,Steeles East,L'Amoreaux East,M...",6
3,"Alderwood,Long Branch",7
4,Bayview Village,1
5,"Bedford Park,Lawrence Manor East",4
6,Berczy Park,2
7,Business Reply Mail Processing Centre 969 Eastern,4
8,"CFB Toronto,Downsview East",1
9,Caledonia-Fairbanks,4


In [41]:
neigh_summary[neigh_summary['Cluster Label'] == 6]

Unnamed: 0,Neighbourhood,Cluster Label
2,"Agincourt North,Steeles East,L'Amoreaux East,M...",6
18,Davisville,6
19,Davisville North,6
21,Don Mills North,6
32,"Forest Hill West,Forest Hill North",6
41,Humewood-Cedarvale,6
65,Rosedale,6
79,"Summerhill West,Forest Hill SE,Deer Park,South...",6
95,Willowdale South,6
102,"Yorkville,The Annex,North Midtown",6


# Fetching Full Details about the best Neighbourhoods

In [42]:
for i in range(10) :  
    name_of_neigh = list(neigh_summary[neigh_summary['Cluster Label'] == 6]['Neighbourhood'])[i]
    print(toronto_venues[toronto_venues['Neighbourhood'] == name_of_neigh].iloc[0,0:3])

Neighbourhood              Agincourt North,Steeles East,L'Amoreaux East,M...
Neighbourhood Latitude                                               43.8153
Neighbourhood Longitude                                             -79.2846
Name: 534, dtype: object
Neighbourhood              Davisville
Neighbourhood Latitude        43.7043
Neighbourhood Longitude      -79.3888
Name: 2568, dtype: object
Neighbourhood              Davisville North
Neighbourhood Latitude              43.7128
Neighbourhood Longitude            -79.3902
Name: 2368, dtype: object
Neighbourhood              Don Mills North
Neighbourhood Latitude             43.7459
Neighbourhood Longitude           -79.3522
Name: 1147, dtype: object
Neighbourhood              Forest Hill West,Forest Hill North
Neighbourhood Latitude                                43.6969
Neighbourhood Longitude                              -79.4113
Name: 4211, dtype: object
Neighbourhood              Humewood-Cedarvale
Neighbourhood Latitude           