# Week 3 Capstone Project Notebook


Notebook for the Data Science Specialization Capstone 

In [1]:
#importing numpy and Pandas
import pandas as pd
import numpy as np

## Getting and parsing the Wikipedia page of postal codes of Canada
First, we import BeautifulSoup and urlopen to read and then parse the html page, we then filter out the tabe of postal codes using BeautifulSoup's find function.

Secondly, we convert the html table into a pandas dataframe.

In [2]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

In [3]:
#Read the Wekipedia html page of postal codes of Canada.
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = urlopen(url)
html = response.read()

In [4]:
#Filtering the table of postal codes of Canada
soup = BeautifulSoup(html, 'html.parser')
table = soup.find('table')
df = pd.read_html(str(table))[0]

In [5]:
#convert all the Not assigned entries of the data frame to NAN
df.replace("Not assigned", np.nan, inplace=True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,
9,M8A,,


In [6]:
#Remove all Boroughs with Not assigned entries
df.dropna(subset=["Borough"], inplace=True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [7]:
#Replace  all Neighbourhood with Not assigned entries with Boroughs Name.

df.Neighbourhood.fillna(df.Borough,inplace=True)
df.head(10)


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [8]:
#join Neighbourhoods with the same Postcode to the same row.
g = df.groupby(['Postcode','Borough'])
sm = lambda lst: ', '.join(lst)
df = g.aggregate(sm)
df.reset_index(inplace=True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [9]:
df.shape

(103, 3)

## Reading the Geolocation of the Neighbourhoods of Canada
We import geocoder to get the Lat and Long of each Neigbourhood.
However, the geocoder is not  working reliably so we use the provided CSV file.

In [10]:
import geocoder # import geocoder
import requests

In [11]:
df_latlng = pd.read_csv("Geospatial_Coordinates.csv")
df_latlng.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [12]:
df_latlng.shape

(103, 3)

In [13]:
#Using Inner product we join both dataframes(Neighbourhoods and Geoocations) on the Postal Codes columns.
df = pd.merge(df, df_latlng, left_on="Postcode", right_on="Postal Code", how="inner")
df.drop("Postal Code", axis=1, inplace=True)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Visualizing and Clustering the Neigbourhoods
We use folium for map Visualization and Kmeans to cluster the neighbourhoods

In [14]:
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

In [15]:
#Get the lat and lng of Toronto Canada
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronta, Canada")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [16]:
#Show Toronto, Canda on the map.
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [17]:
#cluster the neighbourhoods into 6 clusters using the lat and lng coordinates.
kclusters = 6

df_clst = df[["Latitude","Longitude"]]

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_clst)

In [18]:
df["clusters"] = kmeans.labels_
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,clusters
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,2
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,5
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,2
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,5


In [19]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df['Latitude'], df['Longitude'], df['Neighbourhood'], df['clusters']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Conclusion
As expected the Clusters of the Neighbourhoods are based on their proximity to one another as we have used the lat and lng coordinate for the clustering.

### Clustering the Neigbourhoods using Foursqure

In [23]:
CLIENT_ID = 'CSOAGSBY1EYYSFSQTXZ5WHZ0LY2HGKZR43WX4U5WA1GQAIJV' # your Foursquare ID
CLIENT_SECRET = 'ZPHR5SMPV0T2QQP5BZJZ40WS0FI20SYAU2WINL5QAE22WTAB' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=30
radius =1500



Get nearby venues for each Neighbourhood in the data set

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
df_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

In [26]:
# one hot encoding
df_onehot = pd.get_dummies(df_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe
df_onehot['Neighbourhood'] =df_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [df_onehot.columns[-1]] + list(df_onehot.columns[:-1])
df_onehot = df_onehot[fixed_columns]

df_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Groupby Neighbourhood and take the mean of the frequency of occurrence of each category

In [27]:
df_grouped = df_onehot.groupby('Neighbourhood').mean().reset_index()
df_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
#extract  most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

take the top 20 venues for each neighbourhood

In [29]:
num_top_venues = 20

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = df_grouped['Neighbourhood']

for ind in np.arange(df_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,"Adelaide, King, Richmond",Steakhouse,Hotel,Café,American Restaurant,Asian Restaurant,Pizza Place,Lounge,Smoke Shop,Speakeasy,...,Sushi Restaurant,Concert Hall,Plaza,Monument / Landmark,Neighborhood,Food Court,Vegetarian / Vegan Restaurant,Noodle House,Coffee Shop,Opera House
1,Agincourt,Lounge,Skating Rink,Breakfast Spot,Sandwich Place,Curling Ice,Dumpling Restaurant,Drugstore,Dog Run,Discount Store,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Clothing Store,Creperie,Coworking Space,Costume Shop
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Asian Restaurant,Playground,Park,Dessert Shop,Cuban Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,...,Dim Sum Restaurant,Creperie,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Diner,Coworking Space,Electronics Store,Costume Shop
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Fried Chicken Joint,Coffee Shop,Sandwich Place,Pharmacy,Beer Store,Fast Food Restaurant,Pizza Place,Garden Center,...,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Gluten-free Restaurant,Curling Ice,Cuban Restaurant,Creperie
4,"Alderwood, Long Branch",Pizza Place,Skating Rink,Coffee Shop,Pharmacy,Pool,Pub,Sandwich Place,Gym,Airport Terminal,...,Drugstore,Dog Run,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice


In [30]:
# set number of clusters
kclusters = 5

df_grouped_clustering = df_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10].astype('int')

array([1, 1, 4, 1, 1, 1, 1, 1, 1, 1])

In [31]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
df_merged = df
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
df_merged = df_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood', how="inner")



In [32]:
df_merged.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2,0,Fast Food Restaurant,Print Shop,Yoga Studio,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Electronics Store,Coworking Space,Costume Shop,Cosmetics Shop
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,2,0,Bar,Yoga Studio,Empanada Restaurant,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Coworking Space,Costume Shop,Cosmetics Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,2,0,Pizza Place,Rental Car Location,Electronics Store,...,Dog Run,Coffee Shop,Discount Store,Diner,College Arts Building,Dim Sum Restaurant,College Gym,Dessert Shop,College Stadium,Department Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2,1,Coffee Shop,Korean Restaurant,Yoga Studio,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2,0,Caribbean Restaurant,Bank,Thai Restaurant,...,Dance Studio,Dim Sum Restaurant,Drugstore,Dumpling Restaurant,Dessert Shop,Department Store,Eastern European Restaurant,Deli / Bodega,Yoga Studio,Cuban Restaurant
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,2,0,Playground,Convenience Store,Cuban Restaurant,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Clothing Store,Coworking Space,Costume Shop,Cosmetics Shop
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,2,1,Discount Store,Bus Station,Coffee Shop,...,Dim Sum Restaurant,Dessert Shop,Deli / Bodega,Dance Studio,Curling Ice,Yoga Studio,Clothing Store,Creperie,Coworking Space,Costume Shop
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577,5,0,Bus Line,Bakery,Bus Station,...,Drugstore,Dog Run,Discount Store,Diner,Dim Sum Restaurant,College Gym,Dessert Shop,Department Store,Deli / Bodega,College Stadium
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476,2,0,American Restaurant,Motel,Yoga Studio,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,5,1,Skating Rink,College Stadium,General Entertainment,...,Diner,Cuban Restaurant,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Curling Ice,Coworking Space,Creperie,Electronics Store


In [33]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Neighbourhood'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Cluster 0

In [34]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
0,Scarborough,2,0,Fast Food Restaurant,Print Shop,Yoga Studio,Cuban Restaurant,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Electronics Store,Coworking Space,Costume Shop,Cosmetics Shop
1,Scarborough,2,0,Bar,Yoga Studio,Empanada Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Coworking Space,Costume Shop,Cosmetics Shop
2,Scarborough,2,0,Pizza Place,Rental Car Location,Electronics Store,Medical Center,Breakfast Spot,Mexican Restaurant,Intersection,...,Dog Run,Coffee Shop,Discount Store,Diner,College Arts Building,Dim Sum Restaurant,College Gym,Dessert Shop,College Stadium,Department Store
4,Scarborough,2,0,Caribbean Restaurant,Bank,Thai Restaurant,Fried Chicken Joint,Bakery,Athletics & Sports,Hakka Restaurant,...,Dance Studio,Dim Sum Restaurant,Drugstore,Dumpling Restaurant,Dessert Shop,Department Store,Eastern European Restaurant,Deli / Bodega,Yoga Studio,Cuban Restaurant
5,Scarborough,2,0,Playground,Convenience Store,Cuban Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Clothing Store,Coworking Space,Costume Shop,Cosmetics Shop
7,Scarborough,5,0,Bus Line,Bakery,Bus Station,Fast Food Restaurant,Intersection,Soccer Field,Park,...,Drugstore,Dog Run,Discount Store,Diner,Dim Sum Restaurant,College Gym,Dessert Shop,Department Store,Deli / Bodega,College Stadium
8,Scarborough,2,0,American Restaurant,Motel,Yoga Studio,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
10,Scarborough,2,0,Indian Restaurant,Pet Store,Latin American Restaurant,Chinese Restaurant,Vietnamese Restaurant,Dessert Shop,Curling Ice,...,Discount Store,Dim Sum Restaurant,Diner,Dog Run,Drugstore,Dumpling Restaurant,Cuban Restaurant,Creperie,Electronics Store,Coworking Space
13,Scarborough,2,0,Pizza Place,Fried Chicken Joint,Chinese Restaurant,Noodle House,Shopping Mall,Fast Food Restaurant,Italian Restaurant,...,Convenience Store,Dessert Shop,Department Store,Gay Bar,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Coworking Space
17,North York,3,0,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Yoga Studio,Creperie,Drugstore,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Coworking Space,Eastern European Restaurant,Costume Shop,Cosmetics Shop


### Cluster 1

In [35]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
3,Scarborough,2,1,Coffee Shop,Korean Restaurant,Yoga Studio,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
6,Scarborough,2,1,Discount Store,Bus Station,Coffee Shop,Department Store,Cuban Restaurant,Eastern European Restaurant,Dumpling Restaurant,...,Dim Sum Restaurant,Dessert Shop,Deli / Bodega,Dance Studio,Curling Ice,Yoga Studio,Clothing Store,Creperie,Coworking Space,Costume Shop
9,Scarborough,5,1,Skating Rink,College Stadium,General Entertainment,Café,Dim Sum Restaurant,Dance Studio,Deli / Bodega,...,Diner,Cuban Restaurant,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Curling Ice,Coworking Space,Creperie,Electronics Store
11,Scarborough,5,1,Breakfast Spot,Bakery,Middle Eastern Restaurant,Sandwich Place,Auto Garage,Yoga Studio,Electronics Store,...,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie
12,Scarborough,2,1,Lounge,Skating Rink,Breakfast Spot,Sandwich Place,Curling Ice,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Clothing Store,Creperie,Coworking Space,Costume Shop
15,Scarborough,2,1,Fast Food Restaurant,Chinese Restaurant,Gym,Breakfast Spot,Grocery Store,Electronics Store,Nail Salon,...,Sandwich Place,Pizza Place,American Restaurant,Arcade,Deli / Bodega,Airport Food Court,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run
18,North York,5,1,Clothing Store,Coffee Shop,Liquor Store,Bank,Japanese Restaurant,Electronics Store,Smoothie Shop,...,Fast Food Restaurant,Department Store,Restaurant,Burger Joint,Cosmetics Shop,Candy Store,Movie Theater,Bakery,Pharmacy,American Restaurant
19,North York,3,1,Chinese Restaurant,Bank,Café,Japanese Restaurant,Yoga Studio,Dance Studio,Eastern European Restaurant,...,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Cuban Restaurant,Curling Ice,Empanada Restaurant,Creperie
22,North York,3,1,Ramen Restaurant,Coffee Shop,Sandwich Place,Restaurant,Café,Arts & Crafts Store,Lounge,...,Electronics Store,Indonesian Restaurant,Steakhouse,Ice Cream Shop,Sushi Restaurant,Middle Eastern Restaurant,Fast Food Restaurant,Grocery Store,Pizza Place,Vietnamese Restaurant
24,North York,3,1,Grocery Store,Pizza Place,Coffee Shop,Pharmacy,Gluten-free Restaurant,Gift Shop,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Coworking Space,Costume Shop


### Cluster 2

In [36]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
90,Etobicoke,4,2,River,Yoga Studio,Cuban Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Clothing Store,Coworking Space,Costume Shop,Cosmetics Shop


### Cluster 3

In [37]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
91,Etobicoke,4,3,Baseball Field,Construction & Landscaping,Yoga Studio,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
97,North York,1,3,Baseball Field,Yoga Studio,Empanada Restaurant,Eastern European Restaurant,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Coworking Space,Costume Shop,Cosmetics Shop


### Cluster 4

In [38]:
df_merged.loc[df_merged['Cluster Labels'] == 4, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,clusters,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,...,11th Most Common Venue,12th Most Common Venue,13th Most Common Venue,14th Most Common Venue,15th Most Common Venue,16th Most Common Venue,17th Most Common Venue,18th Most Common Venue,19th Most Common Venue,20th Most Common Venue
14,Scarborough,2,4,Asian Restaurant,Playground,Park,Dessert Shop,Cuban Restaurant,Curling Ice,Dance Studio,...,Dim Sum Restaurant,Creperie,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Diner,Coworking Space,Electronics Store,Costume Shop
23,North York,3,4,Park,Bank,Yoga Studio,Curling Ice,Eastern European Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cuban Restaurant,Empanada Restaurant,Creperie,Coworking Space,Costume Shop
25,North York,5,4,Park,Fast Food Restaurant,Food & Drink Shop,Yoga Studio,Cuban Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Electronics Store,Coworking Space,Costume Shop
30,North York,3,4,Bus Stop,Playground,Airport,Park,Dim Sum Restaurant,Curling Ice,Dance Studio,...,Yoga Studio,Diner,Cuban Restaurant,Dog Run,Drugstore,Dumpling Restaurant,Discount Store,Coworking Space,Creperie,Electronics Store
40,East York,5,4,Park,Metro Station,Convenience Store,Yoga Studio,Cuban Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Creperie,Greek Restaurant,Coworking Space,Costume Shop
44,Central Toronto,3,4,Bus Line,Park,Swim School,Yoga Studio,Dumpling Restaurant,Drugstore,Dog Run,...,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Cuban Restaurant,Creperie,Electronics Store,Coworking Space,Costume Shop
48,Central Toronto,0,4,Playground,Park,Restaurant,Tennis Court,Yoga Studio,Deli / Bodega,Creperie,...,Dessert Shop,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Coworking Space,Cosmetics Shop,Costume Shop,Dumpling Restaurant
50,Downtown Toronto,0,4,Park,Playground,Trail,Yoga Studio,Dessert Shop,Curling Ice,Dance Studio,...,Dim Sum Restaurant,Creperie,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Cuban Restaurant,Costume Shop,Coworking Space,Electronics Store
64,Central Toronto,3,4,Jewelry Store,Sushi Restaurant,Park,Trail,Dance Studio,Deli / Bodega,Department Store,...,Cuban Restaurant,Diner,Discount Store,Dog Run,Drugstore,Dumpling Restaurant,Curling Ice,Creperie,Electronics Store,Coworking Space
74,York,3,4,Park,Pharmacy,Women's Store,Fast Food Restaurant,Cuban Restaurant,Dumpling Restaurant,Drugstore,...,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice,Coworking Space,Creperie,Electronics Store,Costume Shop
