# Toronto neighborhoods - How safe are they from Covid-19?
#### Build a dataframe of Covid-19 cases and neighborhoods in Toronto by web scraping the data from Wikipedia page and Canada Covid-19 data website
#### Get the geographical coordinates of the neighborhoods
#### Obtain the venue data for the neighborhoods from Foursquare API
#### Explore and cluster the neighborhoods
#### Select the safest neighborhood to explore

#### 1. Import libraries

In [3]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

#### 2.Importing Covid-19 data

In [4]:
#Reading Covi-19 data as an excel file from https://www.toronto.ca/home/covid-19/covid-19-latest-city-of-toronto-news/covid-19-status-of-cases-in-toronto/
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0


body = client_fc94b669a6f14b43ad8b13f90e749295.get_object(Bucket='couseracapstoneproject-donotdelete-pr-onz12bad7nwubq',Key='City_Toronto_COVID-19_neighborhood_june 11.xlsx')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_tor = pd.read_excel(body)
df_tor.head()


Unnamed: 0,Neighbourhood Name,Cases,Unnamed: 2,Postal Code
0,Agincourt,57,,M1B
1,Agincourt South-Malvern West,48,,M1C
2,Alderwood,32,,M1E
3,Annex,76,,M1G
4,Banbury-Don Mills,29,,M1H


#### 3. Cleaning Data 

In [5]:
#Removing unnecessary columns
df_tor.drop(df_tor.columns[[2]], axis=1, inplace=True)
df_tor.rename(columns={'Neighbourhood Name':'Neighborhood'},inplace=True)
df_tor.head()

Unnamed: 0,Neighborhood,Cases,Postal Code
0,Agincourt,57,M1B
1,Agincourt South-Malvern West,48,M1C
2,Alderwood,32,M1E
3,Annex,76,M1G
4,Banbury-Don Mills,29,M1H


#### 4. Get the geographical coordinates

In [6]:
#Reading latitude and longitude
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Kuala Lumpur, Malaysia'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords
df_lat = [ get_latlng(neighborhood) for neighborhood in kl_df["Neighborhood"]]

In [7]:
#Merging them on Postal Code
df_tor = pd.merge(df_tor,df_lat, on ="Postal Code")
df_tor.head()

Unnamed: 0,Neighborhood,Cases,Postal Code,Latitude,Longitude
0,Agincourt,57,M1B,43.806686,-79.194353
1,Agincourt South-Malvern West,48,M1C,43.784535,-79.160497
2,Alderwood,32,M1E,43.763573,-79.188711
3,Annex,76,M1G,43.770992,-79.216917
4,Banbury-Don Mills,29,M1H,43.773136,-79.239476


#### 5. Toronto map with size of markers depicting cases

In [8]:
# get the coordinates of Kuala Lumpur
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, cases, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Cases'], df_tor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, cases)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=cases/20,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### 6. Use the Foursquare API to explore the neighborhoods

In [9]:
#foursquare credentials
CLIENT_ID = 'EG4WQUKYXRNU1RZLTCLKYSKCROTXC41BMQX13I0B452HDPDT' # your Foursquare ID
CLIENT_SECRET = 'QKVUXUUVP3VXWVCWHRNAQKW3UKKL1EFKYBGW04RLDZXY440W' # your Foursquare Secret
VERSION = '20180605'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EG4WQUKYXRNU1RZLTCLKYSKCROTXC41BMQX13I0B452HDPDT
CLIENT_SECRET:QKVUXUUVP3VXWVCWHRNAQKW3UKKL1EFKYBGW04RLDZXY440W


#### 7.Top 30 venues that are within a radius of 500 meters.

In [10]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [11]:
Toronto_venues = getNearbyVenues(names=df_tor['Neighborhood'],
                                   latitudes=df_tor['Latitude'],
                                   longitudes=df_tor['Longitude']
                                  )

Agincourt 
Agincourt South-Malvern West
Alderwood
Annex
Banbury-Don Mills
Bathurst Manor
Bay Street Corridor
Bayview Village
Bayview Woods-Steeles
Bedford Park-Nortown
Beechborough-Greenbrook
Bendale
Birchcliffe-Cliffside
Black Creek
Blake-Jones
Briar Hill - Belgravia
Bridle Path-Sunnybrook-York Mills
Broadview North
Brookhaven-Amesbury
Cabbagetown-South St. James Town
Caledonia-Fairbank
Casa Loma
Centennial Scarborough
Church-Yonge Corridor
Clairlea-Birchmount
Clanton Park
Cliffcrest
Corso Italia-Davenport
Danforth
Danforth-East York
Don Valley Village
Dorset Park
Dovercourt-Wallace Emerson-Junction
Downsview-Roding-CFB
Dufferin Grove
East End-Danforth
Edenbridge-Humber Valley
Eglinton East
Elms-Old Rexdale
Englemount-Lawrence
Eringate-Centennial-West Deane
Etobicoke West Mall
Flemingdon Park
Forest Hill North
Forest Hill South
Glenfield-Jane Heights
Greenwood-Coxwell
Guildwood
Henry Farm
High Park North
High Park-Swansea
Highland Creek
Hillcrest Village
Humber Heights-Westmount
Humbe

In [12]:
print(Toronto_venues.shape)
Toronto_venues.head()

(1285, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Agincourt,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,Agincourt South-Malvern West,43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
2,Agincourt South-Malvern West,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,Alderwood,43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,Alderwood,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [13]:
#Grouping the venues by neighborhoods 
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,1,1,1,1,1,1
Agincourt South-Malvern West,2,2,2,2,2,2
Alderwood,7,7,7,7,7,7
Annex,3,3,3,3,3,3
Banbury-Don Mills,8,8,8,8,8,8
Bathurst Manor,2,2,2,2,2,2
Bay Street Corridor,5,5,5,5,5,5
Bayview Village,10,10,10,10,10,10
Bayview Woods-Steeles,2,2,2,2,2,2
Bedford Park-Nortown,4,4,4,4,4,4


#### 8. Analyzing each neighborhood and calculating the weight of each venue

In [14]:
# one hot encoding
tor_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
tor_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

tor_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [15]:
tor_grouped = tor_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
1,Agincourt South-Malvern West,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
2,Alderwood,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
3,Annex,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
4,Banbury-Don Mills,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
5,Bathurst Manor,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
6,Bay Street Corridor,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
7,Bayview Village,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
8,Bayview Woods-Steeles,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.500000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00
9,Bedford Park-Nortown,0.000000,0.000000,0.00,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0000,0.00


#### 9.Returning only the top 5 venues for each neighborhood

In [16]:
num_top_venues = 5

for hood in tor_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = tor_grouped[tor_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt ----
                  venue  freq
0  Fast Food Restaurant   1.0
1           Yoga Studio   0.0
2     Mobile Phone Shop   0.0
3    Mac & Cheese Joint   0.0
4      Malay Restaurant   0.0


----Agincourt South-Malvern West----
                        venue  freq
0  Construction & Landscaping   0.5
1                         Bar   0.5
2                 Yoga Studio   0.0
3           Mobile Phone Shop   0.0
4            Malay Restaurant   0.0


----Alderwood----
                 venue  freq
0       Breakfast Spot  0.14
1       Medical Center  0.14
2    Electronics Store  0.14
3                 Bank  0.14
4  Rental Car Location  0.14


----Annex----
                        venue  freq
0                 Coffee Shop  0.67
1           Korean Restaurant  0.33
2                 Yoga Studio  0.00
3  Modern European Restaurant  0.00
4                      Market  0.00


----Banbury-Don Mills----
                 venue  freq
0      Thai Restaurant  0.12
1  Fried Chicken Joint  0.12
2    

             venue  freq
0             Café  0.13
1      Coffee Shop  0.10
2           Bakery  0.07
3      Yoga Studio  0.03
4  Coworking Space  0.03


----Forest Hill South----
                venue  freq
0                Park  0.33
1         Swim School  0.33
2            Bus Line  0.33
3         Yoga Studio  0.00
4  Miscellaneous Shop  0.00


----Glenfield-Jane Heights----
               venue  freq
0                Gym  0.11
1     Breakfast Spot  0.11
2              Hotel  0.11
3  Food & Drink Shop  0.11
4   Department Store  0.11


----Greenwood-Coxwell----
                 venue  freq
0       Clothing Store  0.16
1          Coffee Shop  0.11
2          Yoga Studio  0.05
3  Sporting Goods Shop  0.05
4                Diner  0.05


----Guildwood----
          venue  freq
0  Dessert Shop  0.10
1           Gym  0.07
2   Pizza Place  0.07
3   Coffee Shop  0.07
4          Café  0.07


----Henry Farm----
                venue  freq
0                 Gym   0.5
1                Park   0.5


            venue  freq
0     Yoga Studio  0.06
1      Restaurant  0.06
2  Farmers Market  0.06
3   Burrito Place  0.06
4      Skate Park  0.06


----Newtonbrook West----
         venue  freq
0          Gym  0.08
1       Bakery  0.08
2  Pizza Place  0.08
3  Coffee Shop  0.08
4   Restaurant  0.08


----Niagara----
                venue  freq
0         Pizza Place  0.22
1                 Gym  0.11
2  Athletics & Sports  0.11
3         Coffee Shop  0.11
4                 Pub  0.11


----North Riverdale----
                venue  freq
0                Park   0.5
1               River   0.5
2         Yoga Studio   0.0
3   Mobile Phone Shop   0.0
4  Mac & Cheese Joint   0.0


----North St. James Town----
              venue  freq
0    Baseball Field   1.0
1       Yoga Studio   0.0
2            Lounge   0.0
3  Malay Restaurant   0.0
4            Market   0.0


----O'Connor-Parkview----
                  venue  freq
0                   Gym  0.06
1         Tanning Salon  0.06
2         Grocery 

In [17]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Getting the Most 10 common venues by neighborhood

In [18]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted1 = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted1['Neighborhood'] = tor_grouped['Neighborhood']

for ind in np.arange(tor_grouped.shape[0]):
    neighborhoods_venues_sorted1.iloc[ind, 1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted1.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Fast Food Restaurant,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
1,Agincourt South-Malvern West,Construction & Landscaping,Bar,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
2,Alderwood,Mexican Restaurant,Electronics Store,Breakfast Spot,Bank,Intersection,Medical Center,Rental Car Location,Dessert Shop,Eastern European Restaurant,Drugstore
3,Annex,Coffee Shop,Korean Restaurant,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
4,Banbury-Don Mills,Athletics & Sports,Gas Station,Thai Restaurant,Bakery,Caribbean Restaurant,Bank,Fried Chicken Joint,Hakka Restaurant,Donut Shop,Dog Run


#### Cluster Neighborhoods

In [19]:
kclusters = 3

tor_grouped_clustering = tor_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(tor_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

#### Cluster output modification to answer our questions


In [20]:
#neighborhoods_venues_sorted1.drop('Cluster Labels', axis=1, inplace=True)

In [21]:
# add clustering labels
neighborhoods_venues_sorted1.insert(0, 'Cluster Labels', kmeans.labels_)

tor_merged = df_tor

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
tor_merged = tor_merged.join(neighborhoods_venues_sorted1.set_index('Neighborhood'), on='Neighborhood')

In [22]:
tor_merged = tor_merged.dropna()

#### Plotting Toronto Map for different clusters, size of marker indicates cases

In [23]:
latitude=43.658
longitude=-79.388
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster,cases in zip(tor_merged['Latitude'], tor_merged['Longitude'], tor_merged['Neighborhood'], tor_merged['Cluster Labels'], tor_merged['Cases']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=cases/15,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [24]:
tor_merged['Cluster Labels'].value_counts()

1.0    82
0.0    13
2.0     2
Name: Cluster Labels, dtype: int64

#### What each  Clusters Mean?

In [25]:
#Cluster 0 
tor_merged.loc[tor_merged['Cluster Labels'] == 0, tor_merged.columns[[2] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,M1V,0.0,Park,Playground,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
23,M2P,0.0,Park,Convenience Store,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
25,M3A,0.0,Park,Fireworks Store,Food & Drink Shop,Women's Store,Department Store,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
30,M3K,0.0,Park,Snack Place,Airport,Business Service,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
31,M3L,0.0,Grocery Store,Bank,Shopping Mall,Park,Gift Shop,Ethiopian Restaurant,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
40,M4J,0.0,Park,Convenience Store,Intersection,Metro Station,Women's Store,Department Store,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
44,M4N,0.0,Park,Swim School,Bus Line,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
48,M4T,0.0,Gym,Park,Women's Store,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
50,M4W,0.0,Park,Playground,Trail,Dance Studio,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
74,M6E,0.0,Park,Pool,Women's Store,Gluten-free Restaurant,Ethiopian Restaurant,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center


In [26]:
#cluster 1
tor_merged.loc[tor_merged['Cluster Labels'] == 1, tor_merged.columns[[2] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,1.0,Fast Food Restaurant,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
1,M1C,1.0,Construction & Landscaping,Bar,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
2,M1E,1.0,Mexican Restaurant,Electronics Store,Breakfast Spot,Bank,Intersection,Medical Center,Rental Car Location,Dessert Shop,Eastern European Restaurant,Drugstore
3,M1G,1.0,Coffee Shop,Korean Restaurant,Women's Store,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
4,M1H,1.0,Athletics & Sports,Gas Station,Thai Restaurant,Bakery,Caribbean Restaurant,Bank,Fried Chicken Joint,Hakka Restaurant,Donut Shop,Dog Run
5,M1J,1.0,Playground,Construction & Landscaping,Women's Store,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
6,M1K,1.0,Department Store,Coffee Shop,Convenience Store,Discount Store,Bus Station,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
7,M1L,1.0,Bakery,Bus Line,Park,Ice Cream Shop,Intersection,Bus Station,Metro Station,Soccer Field,Cosmetics Shop,Electronics Store
8,M1M,1.0,American Restaurant,Motel,Women's Store,Deli / Bodega,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
9,M1N,1.0,College Stadium,Skating Rink,General Entertainment,Café,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run


In [27]:
#cluster 2
tor_merged.loc[tor_merged['Cluster Labels'] == 2, tor_merged.columns[[2] + list(range(5, tor_merged.shape[1]))]]

Unnamed: 0,Postal Code,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
89,M8Y,2.0,Baseball Field,Women's Store,Farmers Market,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
95,M9M,2.0,Baseball Field,Food Service,Women's Store,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop


#### Cluster Observation

##### As we can see from the above dataframes that Cluster 0 is parks dominant, Cluster 1 is restaurants dominant  and Cluster 2 is Sports field dominant.