# Final analysis of the Neighborhood data

### Below cell is from previous notebooks.

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup 

url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
website_as_html = requests.get(url).text
soup = BeautifulSoup(website_as_html,'lxml')
my_table = soup.find('table',{'class':'wikitable sortable'})
tr_tag_data=my_table.findAll('tr')
data={'PostalCode':[],'Borough':[],'Neighborhood':[]}
for i in tr_tag_data[1:]:
    row=i.text.split('\n')[1:-1]
    if row[1]=='Not assigned':
        continue
    elif row[2]=='Not assigned':
        data['Neighborhood'].append(row[1])
    else:
        data['Neighborhood'].append(row[2])
        
    data['PostalCode'].append(row[0])
    data['Borough'].append(row[1])

df=pd.DataFrame(data)
toronto_data=df.sort_values(['PostalCode']).reset_index(drop=True)
toronto_data=toronto_data.groupby(['PostalCode','Borough'], as_index=False).agg(', '.join)

#========================= From Part 2 ==========================

coodinates=pd.read_csv('Lat_Lng.csv')
toronto=pd.merge(toronto_data, coodinates, on='PostalCode')
toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# 1. Explore Dataset

### Get overview of the area covered by the dataframe.

In [2]:
import folium

Toronto_lat,Toronto_lon = 43.6532, -79.3832  # Coordinates for Toronto

map_toronto = folium.Map(location=[Toronto_lat,Toronto_lon], zoom_start=11)

# add markers to map
for lat, lng, borough, code in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['PostalCode']):
    label = '{}, {}'.format(code, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

## Pefect!!

##### Now, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.


In [3]:
# @hidden_cell

CLIENT_ID = 'P3WUWUCIYBA3A0L5QGFD2CNXFSIWOQS3H4PCKTRPBDWVHPG2' # your Foursquare ID
CLIENT_SECRET = '2Y53R40CG5M4NWUCL10ZAEEA1TA0C5MSJSMANG4V4UJFC11E' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

#### Let's explore the first PostalCode in our dataframe.

In [4]:
code=toronto.loc[0,'PostalCode']
Nhood=toronto.loc[0,'Neighborhood']
lat,lng=toronto.loc[0,'Latitude'],toronto.loc[0,'Longitude']
print('Postal Code : {}\nName        : {}'.format(code,Nhood))

Postal Code : M1B
Name        : Rouge, Malvern


#### Now, let's get the top 100 venues that are in The Beaches within a radius of 500 meters.

In [5]:
base_url='https://api.foursquare.com/v2/'
group='venues/'
endpoint='explore?'
credentials='client_id={}&client_secret={}&'.format(CLIENT_ID, CLIENT_SECRET)
lat_lon='ll={},{}&'.format(lat, lng)
version='v={}&'.format(VERSION)
radius='radius=500&'
limit='limit=100'

url = base_url+group+endpoint+credentials+lat_lon+version+radius+limit

##### Make the request and get the json result

In [6]:
results = requests.get(url).json()

#### function that extracts the category of the venue

In [7]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

###### Clean the json and structure it into a pandas dataframe.

In [8]:
from pandas.io.json import json_normalize

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056
1,Interprovincial Group,Print Shop,43.80563,-79.200378


# 2. Explore Neighborhoods in Toronto

In [9]:
def getNearbyVenues(codes, latitudes, longitudes):
    
    venues_list=[]
    for code, lat, lng in zip(codes, latitudes, longitudes):
        #print(code)
            
        # create the API request URL
        url = base_url+group+endpoint+credentials+version+'ll={},{}&radius=500&limit=100'.format(lat, lng)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            code, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['PostalCode', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:
toronto_venues = getNearbyVenues(codes=toronto['PostalCode'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

toronto_venues.head(15)

Unnamed: 0,PostalCode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1B,43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,M1C,43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
5,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
6,M1E,43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa
7,M1E,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
8,M1E,43.763573,-79.188711,chatr Mobile,43.765917,-79.191672,Tech Startup
9,M1E,43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location


In [11]:
toronto_venues['PostalCode'].value_counts()

M5T    100
M5L    100
M5B    100
M5H    100
M5X    100
M5C    100
M5J    100
M5K    100
M5W     94
M5G     88
M4Y     87
M2J     66
M6J     62
M5E     55
M5A     48
M4X     46
M4K     42
M7A     39
M4M     38
M4S     38
M6S     38
M2N     37
M4G     35
M5S     34
M5M     25
M6P     23
M3C     23
M5R     23
M6K     22
M6H     20
      ... 
M3M      4
M2H      4
M6C      4
M1G      4
M9R      4
M6N      4
M1N      4
M4A      4
M6M      4
M6B      4
M9W      3
M3K      3
M1V      3
M3A      3
M4J      3
M8X      3
M9L      3
M3L      3
M4N      3
M1M      3
M4T      2
M5N      2
M9N      2
M1C      2
M1B      2
M1J      1
M2L      1
M8Y      1
M9M      1
M9B      1
Name: PostalCode, Length: 100, dtype: int64

In [12]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 280 uniques categories.


# 3. Analyze Each Neighborhood

In [13]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['PostalCode'] = toronto_venues['PostalCode'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
toronto_grouped = toronto_onehot.groupby('PostalCode').mean().reset_index()
toronto_grouped.head(15)

Unnamed: 0,PostalCode,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M1B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M1C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M1E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M1G,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,M1H,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M1J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M1K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M1L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M1M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M1N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
num_top_venues = 5

for code in toronto_grouped['PostalCode']:
    print("----"+code+"----")
    temp = toronto_grouped[toronto_grouped['PostalCode'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M1B----
                       venue  freq
0                 Print Shop   0.5
1       Fast Food Restaurant   0.5
2  Middle Eastern Restaurant   0.0
3                      Motel   0.0
4        Monument / Landmark   0.0


----M1C----
                        venue  freq
0  Construction & Landscaping   0.5
1                         Bar   0.5
2           Accessories Store   0.0
3          Miscellaneous Shop   0.0
4                       Motel   0.0


----M1E----
                venue  freq
0                 Spa  0.11
1        Intersection  0.11
2      Breakfast Spot  0.11
3      Medical Center  0.11
4  Mexican Restaurant  0.11


----M1G----
               venue  freq
0        Coffee Shop  0.50
1  Convenience Store  0.25
2  Korean Restaurant  0.25
3  Accessories Store  0.00
4  Mobile Phone Shop  0.00


----M1H----
                 venue  freq
0  Fried Chicken Joint  0.12
1     Hakka Restaurant  0.12
2                 Bank  0.12
3      Thai Restaurant  0.12
4   Athletics & Sports  0.12




                 venue  freq
0  Japanese Restaurant  0.07
1          Coffee Shop  0.07
2     Sushi Restaurant  0.06
3              Gay Bar  0.03
4           Restaurant  0.03


----M5A----
            venue  freq
0     Coffee Shop  0.17
1             Pub  0.06
2          Bakery  0.06
3            Park  0.06
4  Breakfast Spot  0.04


----M5B----
                       venue  freq
0                Coffee Shop  0.11
1             Clothing Store  0.06
2             Cosmetics Shop  0.04
3                       Café  0.03
4  Middle Eastern Restaurant  0.03


----M5C----
                venue  freq
0                Café  0.06
1         Coffee Shop  0.06
2               Hotel  0.05
3          Restaurant  0.05
4  Italian Restaurant  0.03


----M5E----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.04
3         Cheese Shop  0.04
4      Farmers Market  0.04


----M5G----
                venue  freq
0         Coffee Shop  0.16
1  Italian

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

##### Now let us create the new dataframe and display the top 10 venues for each neighborhood.

In [17]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['PostalCode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,PostalCode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Fast Food Restaurant,Print Shop,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,M1C,Construction & Landscaping,Bar,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
2,M1E,Mexican Restaurant,Electronics Store,Spa,Intersection,Pizza Place,Breakfast Spot,Tech Startup,Medical Center,Rental Car Location,Yoga Studio
3,M1G,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
4,M1H,Hakka Restaurant,Thai Restaurant,Bank,Bakery,Fried Chicken Joint,Athletics & Sports,Caribbean Restaurant,Lounge,Coworking Space,Concert Hall


# 4. Cluster Neighborhoods

In [18]:
from sklearn.cluster import KMeans

kclusters = 8

toronto_grouped_clustering = toronto_grouped.drop('PostalCode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([4, 6, 6, 0, 6, 7, 5, 6, 6, 6, 6, 6, 5, 5, 4, 5, 6, 6, 6, 2, 6, 0,
       5, 4, 6, 6, 6, 6, 4, 5, 6, 5, 6, 5, 6, 6, 6, 5, 0, 6, 6, 6, 4, 5,
       6, 6, 7, 6, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 6, 6, 6,
       6, 6, 6, 6, 4, 6, 4, 6, 6, 6, 6, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 5,
       4, 3, 5, 1, 5, 5, 3, 0, 5, 4, 5, 6], dtype=int32)

In [19]:
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted['Cluster Labels']=kmeans.labels_
toronto_merged = toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('PostalCode'), on='PostalCode')

# to handle NA values
toronto_merged.dropna(inplace=True)

# to convert dtype of 'Cluster Labels'
toronto_merged = toronto_merged.astype({'Cluster Labels': int})
toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,Fast Food Restaurant,Print Shop,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,4
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497,Construction & Landscaping,Bar,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,6
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Mexican Restaurant,Electronics Store,Spa,Intersection,Pizza Place,Breakfast Spot,Tech Startup,Medical Center,Rental Car Location,Yoga Studio,6
3,M1G,Scarborough,Woburn,43.770992,-79.216917,Coffee Shop,Korean Restaurant,Convenience Store,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,0
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,Hakka Restaurant,Thai Restaurant,Bank,Bakery,Fried Chicken Joint,Athletics & Sports,Caribbean Restaurant,Lounge,Coworking Space,Concert Hall,6


In [20]:
map_clusters = folium.Map(location=[Toronto_lat,Toronto_lon], zoom_start=11)

# set color scheme for the clusters
rainbow = ['#ecc86f','#386df9','#12c8e6','#5af8c8','#a4f89f','#8000ff','#ff0000','#ff6d38']

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)],
        fill=True,
        fill_color=rainbow[int(cluster)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# 5. Examine Clusters

In [21]:
toronto_merged['Cluster Labels'].value_counts()

6    60
5    18
4    12
0     4
7     2
3     2
2     1
1     1
Name: Cluster Labels, dtype: int64

### OBSERVATION 1:  Cluster 6 is most populated cluster followed by cluster 5 and cluster 4.

### Cluster 6

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 6, toronto_merged.columns[[0,1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
1,M1C,Scarborough,Bar,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,6
2,M1E,Scarborough,Electronics Store,Spa,Intersection,Pizza Place,Breakfast Spot,Tech Startup,Medical Center,Rental Car Location,Yoga Studio,6
4,M1H,Scarborough,Thai Restaurant,Bank,Bakery,Fried Chicken Joint,Athletics & Sports,Caribbean Restaurant,Lounge,Coworking Space,Concert Hall,6
7,M1L,Scarborough,Bakery,Fast Food Restaurant,Intersection,Bus Station,Metro Station,Soccer Field,Park,Gift Shop,Ethiopian Restaurant,6
8,M1M,Scarborough,American Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio,6
9,M1N,Scarborough,General Entertainment,Skating Rink,Café,Comic Shop,Concert Hall,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,6
10,M1P,Scarborough,Vietnamese Restaurant,Latin American Restaurant,Chinese Restaurant,Pet Store,Comic Shop,Concert Hall,Event Space,Ethiopian Restaurant,Empanada Restaurant,6
11,M1R,Scarborough,Auto Garage,Breakfast Spot,Bakery,Shopping Mall,Sandwich Place,Donut Shop,Discount Store,Dog Run,Doner Restaurant,6
17,M2H,North York,Mediterranean Restaurant,Golf Course,Pool,Yoga Studio,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Doner Restaurant,6
18,M2J,North York,Fast Food Restaurant,Coffee Shop,Restaurant,Kids Store,Metro Station,Bakery,Tea Room,Japanese Restaurant,Toy / Game Store,6


### OBSERVATION 2: Cluster 6 mostly consists of Coffee Shops alongwith Restaurants & Bars.

### Cluster 5

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[0,1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
6,M1K,Scarborough,Coffee Shop,Discount Store,Department Store,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant,5
12,M1S,Scarborough,Breakfast Spot,Sandwich Place,Chinese Restaurant,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,5
13,M1T,Scarborough,Fast Food Restaurant,Noodle House,Fried Chicken Joint,Bank,Thai Restaurant,Chinese Restaurant,Italian Restaurant,Pharmacy,Drugstore,5
15,M1W,Scarborough,Fast Food Restaurant,Breakfast Spot,Grocery Store,Pharmacy,Pizza Place,Coffee Shop,American Restaurant,Thrift / Vintage Store,Sandwich Place,5
24,M2R,North York,Pizza Place,Grocery Store,Coffee Shop,Discount Store,Drugstore,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant,5
31,M3L,North York,Bank,Shopping Mall,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,5
33,M3N,North York,Grocery Store,Athletics & Sports,Liquor Store,Discount Store,Yoga Studio,Drugstore,Diner,Dog Run,Doner Restaurant,5
35,M4B,East York,Pizza Place,Pet Store,Athletics & Sports,Gastropub,Intersection,Pharmacy,Café,Breakfast Spot,Bank,5
39,M4H,East York,Yoga Studio,Bank,Gym,Housing Development,Intersection,Liquor Store,Discount Store,Park,Coffee Shop,5
45,M4P,Central Toronto,Grocery Store,Park,Breakfast Spot,Sandwich Place,Clothing Store,Food & Drink Shop,Hotel,Yoga Studio,Doner Restaurant,5


### OBSERVATION 3: Cluster 5 mostly consists of Pizza Place alongwith Restaurants & different kind of Stores.

### Cluster 4

In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0,1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,M1B,Scarborough,Print Shop,Department Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,4
14,M1V,Scarborough,Playground,Asian Restaurant,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,4
25,M3A,North York,Park,Food & Drink Shop,Ethiopian Restaurant,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Department Store,4
30,M3K,North York,Airport,Other Repair Shop,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,4
44,M4N,Central Toronto,Bus Line,Swim School,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,4
50,M4W,Downtown Toronto,Playground,Building,Trail,Yoga Studio,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,4
64,M5P,Central Toronto,Park,Sushi Restaurant,Jewelry Store,Yoga Studio,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,4
72,M6B,North York,Pizza Place,Japanese Restaurant,Pub,Yoga Studio,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,4
74,M6E,York,Fast Food Restaurant,Market,Pharmacy,Women's Store,Grocery Store,Donut Shop,Dessert Shop,Dim Sum Restaurant,Diner,4
79,M6L,North York,Deli / Bodega,Bakery,Construction & Landscaping,Basketball Court,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,4


### OBSERVATION 4: Cluster 4 mostly consists of Park alongwith other physical activity venues.

### Cluster 0

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0,1] + list(range(6, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
3,M1G,Scarborough,Korean Restaurant,Convenience Store,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,0
23,M2P,North York,Bank,Convenience Store,Bar,Yoga Studio,Drugstore,Discount Store,Dog Run,Doner Restaurant,Donut Shop,0
40,M4J,East York,Coffee Shop,Convenience Store,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,0
98,M9N,York,Yoga Studio,Drugstore,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,0


### OBSERVATION 5: Cluster 0 mostly consists of Park alongwith Convenience and Discount Stores