# IBM Data Science Final Capstone : Clustering and Segmentation of British Columbia Neighborhood

_In this project we will try to explore the neighborhood of Vancouver, British Columbia, Canada and analyze which neighborhood is best for opening up a new restraunt and which one is a residential area._

In [3]:
#Data source for British Columbia Neighborhood
url='http://www.geonames.org/postal-codes/CA/BC/british-columbia.html'

In [10]:
#Installing required packages
# !conda install -c anaconda lxml
# !conda install -c anaconda BeautifulSoup
#!pip install folium

In [5]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "http://architectureimg.com/wp-content/uploads/2016/08/skyscrapers-vancouver-british-columbia-lights-city-full-hd-1080p-background.jpg")

In [11]:
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium

from bs4 import BeautifulSoup
import requests

In [12]:
columns=['PostalCode', 'Borough', 'Neighborhood','latitude','longitude']
neighborhoods=pd.DataFrame(columns=columns)

## Creating pandas Dataframe from webpage
Since the webpage is not so developer friendly, it requires some specific algorithms to scrape and extract information out of it.

In [13]:
source=requests.get(url).text
soup=BeautifulSoup(source,'html5lib')
table=soup.find('table', class_='restable')

In [14]:
def getLocation(data):
    if data !='':
        return data.replace('\xa0','').split('/')
    else:
        return ['','']


def getProperNeighborhoodName(name, borough):
    #print(name, borough)
    for r in (("North ", ""), ("South ", ""),("East ", ""),("West ", ""),(" ern", "") ,("  ", "")):
        name = name.replace(*r)
    if '(' in name:
        for r in ((borough+" (", ""), (")", "")):
            name = name.replace(*r)
    if '/' in name:
        name=name.split('/')[0]
    return name
    

In [15]:
dfHolder=[]
rowcount=-1
for row in table.find_all('tr'):
    rowcount=rowcount+1
    if rowcount==0: continue
    if rowcount%2 != 0:
        tdHolder=[]
    for td in row.find_all('td'):
        tdHolder.append(td.text)
    if rowcount%2 == 0:
        dfHolder.append(tdHolder)
#print(dfHolder)

In [18]:
for rows in dfHolder:
    neighName= getProperNeighborhoodName(rows[1], rows[5]).strip()
    location = getLocation(rows[-1])
    #print(neighName, " --- ", rows[1])
    neighborhoods = neighborhoods.append({'PostalCode':rows[2],
                                           'Borough': rows[5],
                                          'Neighborhood': neighName,
                                         'latitude':location[0],
                                         'longitude':location[1]}, ignore_index=True)

Data Cleansing : _Dropping duplicate neighborhoods and removing rows where Borough is empty_

In [19]:
#neighborhoods.shape
neighborhoodDF = neighborhoods[neighborhoods['Borough']!='']
neighborhoodDF.drop_duplicates(subset=['Borough','Neighborhood'],keep='first', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [20]:
neighborhoodDF['Borough'].value_counts()

Vancouver            37
Surrey                9
Richmond              8
Burnaby               7
Saanich               5
Delta                 5
Kelowna               5
Nanaimo               4
Kamloops              4
Prince George         3
Vernon                3
Port Coquitlam        3
Victoria              3
Maple Ridge           2
Abbotsford            2
Coquitlam             2
Oak Bay               2
Campbell River        2
Langley Township      2
Courtenay             2
Chilliwack            2
New Westminster       2
Name: Borough, dtype: int64

In [21]:
#mylocation=['Surrey','Delta', 'Vancouver' ]
mylocation=['Vancouver']

my_neighbor = neighborhoodDF.loc[neighborhoodDF['Borough'].isin(mylocation)]
my_neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
86,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04
87,V5L,Vancouver,Grandview-Woodlands,49.279,-123.067
90,V5P,Vancouver,SE Kensington,49.222,-123.068
91,V5R,Vancouver,Renfrew-Collingwood,49.24,-123.041
92,V5S,Vancouver,Killarney,49.218,-123.038
93,V5T,Vancouver,Mount Pleasant,49.262,-123.092
94,V5V,Vancouver,Kensington,49.248,-123.091
95,V5W,Vancouver,SE Riley Park-Little Mountain,49.233,-123.092
96,V5X,Vancouver,SE Oakridge,49.216,-123.098
98,V5Z,Vancouver,Fairview,49.248,-123.121


_Let's look at our neighbors on a map_

In [22]:
address = 'Burnaby'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Oak Bay, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Oak Bay, Canada are 49.2433804, -122.9725459.


In [23]:
map_bc = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoodDF['latitude'], neighborhoodDF['longitude'], neighborhoodDF['Borough'], neighborhoodDF['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_bc)  
    
map_bc

In [25]:
CLIENT_ID = '*****' # your Foursquare ID
CLIENT_SECRET = '*****' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### Getting venues in Vancouver Neighborhood using FourSquare API

In [26]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(my_neighbor['latitude'], my_neighbor['longitude'], my_neighbor['PostalCode'], my_neighbor['Borough'], my_neighbor['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [27]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df

(810, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,The Fair at the PNE,49.282971,-123.042109,Fair
1,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Wooden Roller Coaster,49.281744,-123.035128,Theme Park Ride / Attraction
2,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Livestock Barns,49.284037,-123.039278,Farm
3,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Playland,49.281924,-123.036258,Theme Park
4,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Hastings Community Centre,49.280778,-123.039176,Event Space
5,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,PNE Amphitheatre,49.283777,-123.037242,Stadium
6,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Plaza Beer Garden,49.283701,-123.038785,Beer Garden
7,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Italia Bakery,49.280940,-123.045885,Bakery
8,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Empire Fields,49.281870,-123.034135,Soccer Field
9,V5K,Vancouver,Hastings-Sunrise,49.281,-123.04,Subway,49.280892,-123.045732,Sandwich Place


In [28]:
print('There are {} unique venue categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 179 unique venue categories.


In [34]:
venues_df.groupby('VenueCategory').nunique()
plot_df= venues_df.groupby('VenueCategory').nunique()
plot_df

Unnamed: 0_level_0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
VenueCategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Airport,1,1,1,1,1,1,1,1,1
Airport Terminal,2,1,2,2,1,2,2,2,1
American Restaurant,6,1,6,6,4,6,8,8,1
Amphitheater,1,1,1,1,1,1,1,1,1
Art Gallery,3,1,3,3,3,3,3,3,1
Asian Restaurant,6,1,6,6,6,8,8,8,1
Athletics & Sports,2,1,2,1,2,2,2,2,1
Australian Restaurant,1,1,1,1,1,1,1,1,1
Automotive Shop,1,1,1,1,1,1,1,1,1
BBQ Joint,1,1,1,1,1,1,1,1,1


In [47]:
# one hot encoding
onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
onehot['Neighborhood'] = venues_df['Neighborhood'] 

# move neighbourhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.shape

(810, 180)

In [48]:
oneHotgrouped = onehot.groupby('Neighborhood').mean().reset_index()
oneHotgrouped

Unnamed: 0,Neighborhood,Airport,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,...,Trade School,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Bentall Centre,0.1,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Central Kitsilano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.054054,0.027027,0.0,0.0,0.027027,0.0,0.054054
2,Chaldecutt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Dunbar-Southlands,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
4,End,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01
5,Fairview,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0
6,Grandview-Woodlands,0.0,0.0,0.0,0.0,0.0,0.078947,0.026316,0.0,0.0,...,0.0,0.0,0.0,0.026316,0.026316,0.0,0.0,0.0,0.0,0.0
7,Hastings-Sunrise,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0
8,Kensington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0
9,Killarney,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
# Get most frequent venues
def getTopVenues(row, top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:top_venues]

In [50]:
num_top_venues = 10

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    columns.append('Venue Rank-{}'.format(ind+1))

# create a new dataframe
location_venues_sorted = pd.DataFrame(columns=columns)
location_venues_sorted['Neighborhood'] = oneHotgrouped['Neighborhood']

for ind in np.arange(oneHotgrouped.shape[0]):
    location_venues_sorted.iloc[ind, 1:] = getTopVenues(oneHotgrouped.iloc[ind, :], num_top_venues)

location_venues_sorted

Unnamed: 0,Neighborhood,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
0,Bentall Centre,Airport Terminal,Airport,Irish Pub,Outdoor Sculpture,Breakfast Spot,Gastropub,Gym,Plaza,American Restaurant,Event Space
1,Central Kitsilano,Coffee Shop,Pizza Place,Yoga Studio,Pub,Vegetarian / Vegan Restaurant,Italian Restaurant,Breakfast Spot,Café,Spa,Liquor Store
2,Chaldecutt,Park,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm
3,Dunbar-Southlands,Construction & Landscaping,Vietnamese Restaurant,Home Service,Fast Food Restaurant,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field
4,End,Japanese Restaurant,Coffee Shop,Hotel,Bakery,Italian Restaurant,Restaurant,Sushi Restaurant,Dessert Shop,Cosmetics Shop,Gay Bar
5,Fairview,Bus Stop,Coffee Shop,Chinese Restaurant,Sushi Restaurant,Bubble Tea Shop,Bank,Park,Restaurant,Shopping Mall,Café
6,Grandview-Woodlands,Asian Restaurant,Theater,Italian Restaurant,Grocery Store,Brewery,Coffee Shop,Sushi Restaurant,Pizza Place,Steakhouse,Breakfast Spot
7,Hastings-Sunrise,Theme Park Ride / Attraction,Beer Garden,Event Space,Theme Park,Stadium,Bus Station,Sandwich Place,Farm,Fair,Burger Joint
8,Kensington,Coffee Shop,Bus Stop,Chinese Restaurant,Vietnamese Restaurant,Ice Cream Shop,Supermarket,Grocery Store,Greek Restaurant,Malay Restaurant,Filipino Restaurant
9,Killarney,Chinese Restaurant,Bus Stop,Pharmacy,Bank,Farmers Market,Deli / Bodega,Mobile Phone Shop,Shopping Mall,Sushi Restaurant,Liquor Store


## Clustering using k-means into 5 clusters

In [51]:
backupNeighborhoodDF = oneHotgrouped
#backupNeighborhoodDF

In [52]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

pca = PCA(.95)
oneHotgrouped_cluster = pca.fit_transform(oneHotgrouped.drop('Neighborhood', 1))

In [53]:
print(oneHotgrouped.shape, location_venues_sorted.shape)
uniqueNeighborhood=oneHotgrouped['Neighborhood'].tolist()
uniqueNeighborhood_LocationDF=my_neighbor[my_neighbor.Neighborhood.isin(uniqueNeighborhood)]
uniqueNeighborhood_LocationDF.shape

(32, 180) (32, 11)


(32, 5)

In [54]:
# run k-means clustering for 5 clusters
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(oneHotgrouped_cluster)
print(kmeans.labels_[:], kmeans.labels_.shape)

[1 1 3 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 2 4 1 3 0 1] (32,)


In [55]:

myNeighborhood = oneHotgrouped.merge(uniqueNeighborhood_LocationDF, on = "Neighborhood", how = "left").dropna()
myNeighborhoodFinalDF = myNeighborhood.join(location_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
#print(my_neighbor.shape, myNeighborhood.shape, oneHotgrouped.shape)
#print(kmeans.labels_+1)
myNeighborhoodFinalDF["Cluster Labels"] = kmeans.labels_ + 1
myNeighborhoodFinalDF
#myNeighborhood.shape

Unnamed: 0,Neighborhood,Airport,Airport Terminal,American Restaurant,Amphitheater,Art Gallery,Asian Restaurant,Athletics & Sports,Australian Restaurant,Automotive Shop,...,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10,Cluster Labels
0,Bentall Centre,0.1,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,...,Airport,Irish Pub,Outdoor Sculpture,Breakfast Spot,Gastropub,Gym,Plaza,American Restaurant,Event Space,2
1,Central Kitsilano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Pizza Place,Yoga Studio,Pub,Vegetarian / Vegan Restaurant,Italian Restaurant,Breakfast Spot,Café,Spa,Liquor Store,2
2,Chaldecutt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,4
3,Dunbar-Southlands,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Vietnamese Restaurant,Home Service,Fast Food Restaurant,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,2
4,End,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Hotel,Bakery,Italian Restaurant,Restaurant,Sushi Restaurant,Dessert Shop,Cosmetics Shop,Gay Bar,2
5,Fairview,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Chinese Restaurant,Sushi Restaurant,Bubble Tea Shop,Bank,Park,Restaurant,Shopping Mall,Café,2
6,Grandview-Woodlands,0.0,0.0,0.0,0.0,0.0,0.078947,0.026316,0.0,0.0,...,Theater,Italian Restaurant,Grocery Store,Brewery,Coffee Shop,Sushi Restaurant,Pizza Place,Steakhouse,Breakfast Spot,2
7,Hastings-Sunrise,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,...,Beer Garden,Event Space,Theme Park,Stadium,Bus Station,Sandwich Place,Farm,Fair,Burger Joint,2
8,Kensington,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Bus Stop,Chinese Restaurant,Vietnamese Restaurant,Ice Cream Shop,Supermarket,Grocery Store,Greek Restaurant,Malay Restaurant,Filipino Restaurant,2
9,Killarney,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Bus Stop,Pharmacy,Bank,Farmers Market,Deli / Bodega,Mobile Phone Shop,Shopping Mall,Sushi Restaurant,Liquor Store,2


## Let's plot the cluster into a map

In [57]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib import pyplot as plt
%matplotlib inline

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

kclusters = kclusters + 1

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(myNeighborhoodFinalDF['latitude'],
                                  myNeighborhoodFinalDF['longitude'],
                                  myNeighborhoodFinalDF['Neighborhood'],
                                  myNeighborhoodFinalDF['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Maximum Neighborhoods belong to Cluster 2, Let's Analyse it:

_It looks like this cluster has a lively neighborhood has a lot of Chinese/Japanese restraunts. It means people here like Asian cuisine. This would be a good place to start an Indian restraunt._

In [278]:
myNeighborhoodFinalDF.loc[myNeighborhoodFinalDF['Cluster Labels'] == 2].filter(["Cluster Labels","Borough","Neighborhood","latitude","longitude","Venue Rank-1","Venue Rank-2","Venue Rank-3","Venue Rank-4","Venue Rank-5","Venue Rank-6","Venue Rank-7","Venue Rank-8","Venue Rank-9","Venue Rank-10"])

Unnamed: 0,Cluster Labels,Borough,Neighborhood,latitude,longitude,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
0,2,Vancouver,Bentall Centre,49.293,-123.116,Airport Terminal,Airport,Irish Pub,Outdoor Sculpture,Breakfast Spot,Gastropub,Gym,Plaza,American Restaurant,Event Space
1,2,Vancouver,Central Kitsilano,49.265,-123.165,Coffee Shop,Pizza Place,Yoga Studio,Pub,Vegetarian / Vegan Restaurant,Italian Restaurant,Breakfast Spot,Café,Spa,Liquor Store
3,2,Vancouver,Dunbar-Southlands,49.23,-123.189,Construction & Landscaping,Vietnamese Restaurant,Home Service,Fast Food Restaurant,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field
4,2,Vancouver,End,49.283,-123.13,Japanese Restaurant,Coffee Shop,Gay Bar,Hotel,Bakery,Sushi Restaurant,Restaurant,Dessert Shop,Cosmetics Shop,Italian Restaurant
5,2,Vancouver,Fairview,49.248,-123.121,Bus Stop,Coffee Shop,Chinese Restaurant,Sushi Restaurant,Bubble Tea Shop,Bank,Park,Restaurant,Shopping Mall,Café
6,2,Vancouver,Grandview-Woodlands,49.279,-123.067,Asian Restaurant,Theater,Italian Restaurant,Grocery Store,Brewery,Coffee Shop,Sushi Restaurant,Pizza Place,Steakhouse,Breakfast Spot
7,2,Vancouver,Hastings-Sunrise,49.281,-123.04,Theme Park Ride / Attraction,Stadium,Beer Garden,Event Space,Theme Park,Park,Soccer Field,Farm,Sandwich Place,Fair
8,2,Vancouver,Kensington,49.248,-123.091,Coffee Shop,Bus Stop,Chinese Restaurant,Vietnamese Restaurant,Ice Cream Shop,Supermarket,Grocery Store,Greek Restaurant,Malay Restaurant,Filipino Restaurant
9,2,Vancouver,Killarney,49.218,-123.038,Chinese Restaurant,Bus Stop,Pharmacy,Bank,Farmers Market,Deli / Bodega,Mobile Phone Shop,Shopping Mall,Sushi Restaurant,Liquor Store
11,2,Vancouver,Mount Pleasant,49.262,-123.092,Sushi Restaurant,Hotel,Vietnamese Restaurant,Grocery Store,Ethiopian Restaurant,Convenience Store,Bar,Market,Liquor Store,Park


## Second largest Cluster is cluster 4, Let's Analyse it:

_This cluster has a lot of Parks and Farmer's market, restarunts. It seems to be a peaceful neighborhood and a good place for buying a house._

In [280]:
myNeighborhoodFinalDF.loc[myNeighborhoodFinalDF['Cluster Labels'] == 4]\
.filter(["Cluster Labels","Borough","Neighborhood","latitude","longitude","Venue Rank-1","Venue Rank-2","Venue Rank-3","Venue Rank-4","Venue Rank-5","Venue Rank-6","Venue Rank-7","Venue Rank-8","Venue Rank-9","Venue Rank-10"])

Unnamed: 0,Cluster Labels,Borough,Neighborhood,latitude,longitude,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
2,4,Vancouver,Chaldecutt,49.249,-123.209,Park,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm
10,4,Vancouver,Kitsilano,49.267,-123.198,Park,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm
29,4,Vancouver,Vancouver Southwest Central,49.322,-123.083,Park,Baseball Field,Yoga Studio,Falafel Restaurant,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant


## Cluster 3

In [281]:
myNeighborhoodFinalDF.loc[myNeighborhoodFinalDF['Cluster Labels'] == 3]\
.filter(["Cluster Labels","Borough","Neighborhood","latitude","longitude","Venue Rank-1","Venue Rank-2","Venue Rank-3","Venue Rank-4","Venue Rank-5","Venue Rank-6","Venue Rank-7","Venue Rank-8","Venue Rank-9","Venue Rank-10"])

Unnamed: 0,Cluster Labels,Borough,Neighborhood,latitude,longitude,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
26,3,Vancouver,Vancouver South,49.34,-123.191,Art Gallery,Yoga Studio,Fair,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market


## Cluster 1

In [283]:
myNeighborhoodFinalDF.loc[myNeighborhoodFinalDF['Cluster Labels'] == 1]\
.filter(["Cluster Labels","Borough","Neighborhood","latitude","longitude","Venue Rank-1","Venue Rank-2","Venue Rank-3","Venue Rank-4","Venue Rank-5","Venue Rank-6","Venue Rank-7","Venue Rank-8","Venue Rank-9","Venue Rank-10"])

Unnamed: 0,Cluster Labels,Borough,Neighborhood,latitude,longitude,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
25,1,Vancouver,Vancouver Northwest Central,49.35,-123.068,Trail,Paper / Office Supplies Store,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market
30,1,Vancouver,Vancouver West,49.361,-123.263,Tapas Restaurant,Trail,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market


## Cluster 5

In [284]:
myNeighborhoodFinalDF.loc[myNeighborhoodFinalDF['Cluster Labels'] == 5]\
.filter(["Cluster Labels","Borough","Neighborhood","latitude","longitude","Venue Rank-1","Venue Rank-2","Venue Rank-3","Venue Rank-4","Venue Rank-5","Venue Rank-6","Venue Rank-7","Venue Rank-8","Venue Rank-9","Venue Rank-10"])

Unnamed: 0,Cluster Labels,Borough,Neighborhood,latitude,longitude,Venue Rank-1,Venue Rank-2,Venue Rank-3,Venue Rank-4,Venue Rank-5,Venue Rank-6,Venue Rank-7,Venue Rank-8,Venue Rank-9,Venue Rank-10
27,5,Vancouver,Vancouver Southeast,49.332,-123.142,Pharmacy,Liquor Store,Yoga Studio,Fair,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market
