# IBM APPLIED DATA SCIENCE CAPSTONE

## This notebook is going to be used for the CAPSTONE project

### WEEK 1 ASSIGNMENT

In [1]:
import pandas as pd
import numpy as np

In [2]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


### WEEK 3 ASSIGNMENT

##### LOADING THE REQUIRED LIBRARIES

In [3]:
from bs4 import BeautifulSoup
import requests

from geopy.geocoders import Nominatim
import io

import random
import json
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

from IPython.display import Image 
from IPython.display import display_html
from IPython.core.display import HTML

from pandas.io.json import json_normalize

import folium

#### PART 1 GETTING THE POSTAL CODE, BOROUGH, NEIGHBOURHOOD from WIKIPEDIA

In [4]:
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=1008658788"
source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned\n":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned\n" in row[2]: 
            row[2] = row[1]
        res.append(row)

# Dataframe with 3 columns
dfi = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
dfi["PostalCode"] = dfi["PostalCode"].str.replace("\n","")
dfi["Borough"] = dfi["Borough"].str.replace("\n","")
dfi["Neighborhood"] = dfi["Neighborhood"].str.replace("\n","")

dfi.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### PART 2 INSERT LONGITUDE AND LATITUDE

In [5]:
url = "https://cocl.us/Geospatial_data"
source = requests.get(url).content
data = pd.read_csv(io.StringIO(source.decode('utf-8')))

df = dfi.join(data.set_index('Postal Code'), on='PostalCode')
df.head()
print("Shape: ", df.shape)

Shape:  (103, 5)


#### PART 3A CLUSTERING THE NEIGHBORHOODS IN TORONTO

In [6]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

##### Creating Map of Toronto

In [7]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### PART 3B EXPLORING THE NEIGHBORHOOD IN TORONTO USING FOURSQUARE API

##### INFORMATION TO CONNECT TO FOURSQUARE API

In [8]:
CLIENT_ID = '4MMYNQP5BAVJMZYT4L2ELONCWEKXCKXCZZJ4GGQZHAEOBIBF'
CLIENT_SECRET = 'KXGFJL21EMGVCSZEW5J0SNGGIW3KRO3FRD5G3TZAMHRCNWRJ'
ACCESS_TOKEN = 'IY4SJOHA53FRNJB4JXFTDQYOWR2ZI5TE43TEAUYNBUMUB3SC'
VERSION = '20180604'
LIMIT = 30
radius = 500

##### FINDING A NEIGHBORHOOD TO WORK WITH

In [9]:
df['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East Toronto         5
East York            5
Mississauga          1
Name: Borough, dtype: int64

In [10]:
Selection = 'North York'
neighborhood_data = df[df['Borough'] == Selection].reset_index(drop=True)
neighborhood_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [11]:
address = Selection+', Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(Selection,latitude, longitude))

The geograpical coordinate of North York are 43.7543263, -79.44911696639593.


In [12]:
# create map of North York using latitude and longitude values
map_neighborhood = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(neighborhood_data['Latitude'], neighborhood_data['Longitude'], neighborhood_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_neighborhood)  
    
map_neighborhood

##### EXPLORING THE FIRST NEIGHBORHOOD

In [13]:
neighborhood_data.loc[0, 'Neighborhood']
neighborhood_latitude = neighborhood_data.loc[0, 'Latitude']
neighborhood_longitude = neighborhood_data.loc[0, 'Longitude']
neighborhood_name = neighborhood_data.loc[0, 'Neighborhood']
print(f"The first neighborhood's name is '{neighborhood_name}'.")

The first neighborhood's name is 'Parkwoods'.


#### THE TOP 30 VENUES IN NORTH YORK IN A RADIUS OF 500 meters

In [14]:
LIMIT = 30 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

# get the result to a json file
results = requests.get(url).json()

In [15]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [16]:
venues = results['response']['groups'][0]['items']
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


#### EXPLORING THE NEIGHBORHOODS IN NORTH YORK, TORONTO

##### FUNCTION TO GET VENUES

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
neighborhood_venues = getNearbyVenues(names=neighborhood_data['Neighborhood'],
                                   latitudes=neighborhood_data['Latitude'],
                                   longitudes=neighborhood_data['Longitude']
                                  )

neighborhood_venues.head()

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


##### ANALYZING EACH NEIGHBORHOOD

In [19]:
# one hot encoding
neighborhood_onehot = pd.get_dummies(neighborhood_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
neighborhood_onehot['Neighborhood'] = neighborhood_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [neighborhood_onehot.columns[-1]] + list(neighborhood_onehot.columns[:-1])
neighborhood_onehot = neighborhood_onehot[fixed_columns]

neighborhood_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Shopping Mall,Sporting Goods Shop,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
neighborhood_grouped = neighborhood_onehot.groupby('Neighborhood').mean().reset_index()
neighborhood_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,...,Shopping Mall,Sporting Goods Shop,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,...,0.047619,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.04,0.04,0.0,0.04,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,...,0.035714,0.035714,0.035714,0.035714,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,...,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Check the 10 most common venues in each neighborhood

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = neighborhood_grouped['Neighborhood']

for ind in np.arange(neighborhood_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(neighborhood_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Gas Station,Fried Chicken Joint,Diner,Grocery Store,Deli / Bodega,Ice Cream Shop,Middle Eastern Restaurant,Mobile Phone Shop
1,Bayview Village,Japanese Restaurant,Bank,Café,Chinese Restaurant,Food & Drink Shop,Financial or Legal Service,Clothing Store,Food Truck,Coffee Shop,Comfort Food Restaurant
2,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Sandwich Place,Restaurant,Fast Food Restaurant,Juice Bar,Locksmith,Café,Butcher,Indian Restaurant
3,Don Mills,Gym,Coffee Shop,Restaurant,Dim Sum Restaurant,Clothing Store,Japanese Restaurant,Chinese Restaurant,Caribbean Restaurant,Café,Italian Restaurant
4,Downsview,Grocery Store,Park,Baseball Field,Airport,Liquor Store,Discount Store,Food Truck,Athletics & Sports,Shopping Mall,Bank


##### CLUSTERING NEIGHBORHOODS USING K MEANS

In [22]:
# set number of clusters
kclusters = 5

neighborhood_grouped_clustering = neighborhood_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(neighborhood_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([2, 2, 2, 2, 2, 2, 4, 2, 2, 1, 2, 4, 2, 0, 2, 2, 2, 3])

In [23]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

neighborhood_merged = neighborhood_data

neighborhood_merged = neighborhood_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

neighborhood_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Fast Food Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Coffee Shop,Financial or Legal Service,Pizza Place,Hockey Arena,Intersection,Portuguese Restaurant,Women's Store,Department Store,Clothing Store,Comfort Food Restaurant
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,2.0,Clothing Store,Furniture / Home Store,Women's Store,Miscellaneous Shop,Boutique,Coffee Shop,Vietnamese Restaurant,Accessories Store,Sporting Goods Shop,Café
3,M3B,North York,Don Mills,43.745906,-79.352188,2.0,Gym,Coffee Shop,Restaurant,Dim Sum Restaurant,Clothing Store,Japanese Restaurant,Chinese Restaurant,Caribbean Restaurant,Café,Italian Restaurant
4,M6B,North York,Glencairn,43.709577,-79.445073,4.0,Italian Restaurant,Park,Playground,Bakery,Japanese Restaurant,Women's Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping


##### Geolocator sometimes miss some values and give nan, so removed them

In [24]:
neighborhood_merged = neighborhood_merged.dropna()
neighborhood_merged['Cluster Labels'] = neighborhood_merged['Cluster Labels'].astype(int) 

##### VISUALIZING THE RESULTING CLUSTERS

In [25]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhood_merged['Latitude'], neighborhood_merged['Longitude'], neighborhood_merged['Neighborhood'], neighborhood_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### EXAMINING CLUSTERS

##### CLUSTER 1

In [26]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 0, neighborhood_merged.columns[[1] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Park,Food & Drink Shop,Fast Food Restaurant,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store


##### CLUSTER 2

In [27]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 1, neighborhood_merged.columns[[1] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,1,Baseball Field,Diner,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Deli / Bodega,Department Store,Dessert Shop


##### CLUSTER 3

In [28]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 2, neighborhood_merged.columns[[1] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,2,Coffee Shop,Financial or Legal Service,Pizza Place,Hockey Arena,Intersection,Portuguese Restaurant,Women's Store,Department Store,Clothing Store,Comfort Food Restaurant
2,North York,2,Clothing Store,Furniture / Home Store,Women's Store,Miscellaneous Shop,Boutique,Coffee Shop,Vietnamese Restaurant,Accessories Store,Sporting Goods Shop,Café
3,North York,2,Gym,Coffee Shop,Restaurant,Dim Sum Restaurant,Clothing Store,Japanese Restaurant,Chinese Restaurant,Caribbean Restaurant,Café,Italian Restaurant
5,North York,2,Gym,Coffee Shop,Restaurant,Dim Sum Restaurant,Clothing Store,Japanese Restaurant,Chinese Restaurant,Caribbean Restaurant,Café,Italian Restaurant
6,North York,2,Golf Course,Mediterranean Restaurant,Fast Food Restaurant,Dog Run,Pool,Women's Store,Department Store,Clothing Store,Coffee Shop,Comfort Food Restaurant
7,North York,2,Bank,Coffee Shop,Gas Station,Fried Chicken Joint,Diner,Grocery Store,Deli / Bodega,Ice Cream Shop,Middle Eastern Restaurant,Mobile Phone Shop
8,North York,2,Coffee Shop,Clothing Store,Bank,Restaurant,Juice Bar,Food Court,Liquor Store,Japanese Restaurant,Chocolate Shop,Video Game Store
9,North York,2,Furniture / Home Store,Caribbean Restaurant,Metro Station,Coffee Shop,Massage Studio,Bar,Miscellaneous Shop,Dessert Shop,Comfort Food Restaurant,Construction & Landscaping
10,North York,2,Japanese Restaurant,Bank,Café,Chinese Restaurant,Food & Drink Shop,Financial or Legal Service,Clothing Store,Food Truck,Coffee Shop,Comfort Food Restaurant
11,North York,2,Grocery Store,Park,Baseball Field,Airport,Liquor Store,Discount Store,Food Truck,Athletics & Sports,Shopping Mall,Bank


##### CLUSTER 4

In [29]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 3, neighborhood_merged.columns[[1] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,3,Park,Electronics Store,Convenience Store,Dessert Shop,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Deli / Bodega


##### CLUSTER 5

In [30]:
neighborhood_merged.loc[neighborhood_merged['Cluster Labels'] == 4, neighborhood_merged.columns[[1] + list(range(5, neighborhood_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,North York,4,Italian Restaurant,Park,Playground,Bakery,Japanese Restaurant,Women's Store,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping
14,North York,4,Park,Basketball Court,Construction & Landscaping,Bakery,Clothing Store,Coffee Shop,Comfort Food Restaurant,Convenience Store,Deli / Bodega,Department Store
