# Applied Data Science Capstone Week 3
# Segmenting and Clustering Toronto Neighborhoods
## Richard C. Anderson

In [113]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
from bs4 import BeautifulSoup 

#### Scrape postal code data from Wikipedia page:

In [2]:
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]
df_list = pd.read_html(str(table))

#### Create dataframe from scraped data:

In [3]:
df_hoods=pd.DataFrame(df_list[0])
df_hoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [4]:
print('Shape of raw dataframe:',df_hoods.shape)

Shape of raw dataframe: (180, 3)


#### Drop rows with no Borough assignment from dataframe:

In [5]:
# Delete rows with no Borough assignment and reset df index
indices = df_hoods[ df_hoods["Borough"] == "Not assigned" ].index
df_hoods.drop(indices, inplace=True)
df_hoods.reset_index(inplace=True)
df_hoods.drop(['index'], axis=1, inplace=True)
df_hoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
print('Shape of filtered dataframe:',df_hoods.shape)

Shape of filtered dataframe: (103, 3)


### Add Geo-location to the Toronto neighborhood data

In [7]:
!pip install geocoder
import geocoder as gc

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 16.3MB/s ta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [8]:
def get_geoloc_for_postalcode(pcode):
    print('PCode:',pcode)
    # initialize your variable to None
    lat_lng_found = None

    # loop until coordinates are returned
    while(lat_lng_found is None):
        g = gc.google('{}, Toronto, Ontario'.format(pcode))
        print(g)
        lat_lng_found = g.latlng
        print('Found:',lat_lng_found)

    lat = lat_lng_coords[0]
    long = lat_lng_coords[1]
    return lat,long

#### Unfortunately, the geocoder call would never return a value, always a [Request Denied] message

In [9]:
#test_lat,test_long = get_geoloc_for_postalcode('M5A')
#print('Lat:',test_lat,' Long:',test_long)

#### Importing the csv file instead...

In [10]:
df_geoloc = pd.read_csv('https://cocl.us/Geospatial_data')
df_geoloc.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merging the geolocation dataframe with the neighborhood dataframe

In [11]:
df_merged = pd.merge(left=df_hoods, right=df_geoloc, left_on='Postal Code', right_on='Postal Code')
df_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [12]:
print('Shape of merged dataframe:',df_merged.shape)

Shape of merged dataframe: (103, 5)


In [33]:
df_toronto = df_merged[df_merged['Borough'].str.contains("Toronto")]
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [14]:
#!conda install -c conda-forge geopy --yes # uncomment if you need geopy installed
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
!conda install -c conda-forge folium=0.5.0 --yes # uncomment if you need folium installed
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                       

In [15]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="yyz_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('Toronto geograpical coordinates are {}, {}.'.format(latitude, longitude))

Toronto geograpical coordinates are 43.6534817, -79.3839347.


In [16]:
# create map of Totonto using latitude and longitude values
#map_toronto_blank = folium.Map(location=[latitude, longitude], zoom_start=10)
#map_toronto_blank

In [17]:
# create map of Toronto boroughs using latitude and longitude values showing postal code centers
map_toronto_boroughs = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, pcode in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Postal Code']):
    label = '{}, {}'.format(pcode, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_boroughs)  
    
map_toronto_boroughs

#### Define Foursquare Credentials and Version

In [18]:
CLIENT_ID = 'NAE2KEGTXHYPOXDP23T5PUOJHXKUPTSSZV4AWVZ5YUE0KWPW' # your Foursquare ID
CLIENT_SECRET = 'CYT3A5YVLCSMMGYOKAQZMYJDSG2WGWSKRN12PJ4PZPCAGU43' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: NAE2KEGTXHYPOXDP23T5PUOJHXKUPTSSZV4AWVZ5YUE0KWPW
CLIENT_SECRET:CYT3A5YVLCSMMGYOKAQZMYJDSG2WGWSKRN12PJ4PZPCAGU43


## Explore Postal Codes in Toronto

I decided to examine the Toronto data from the perspective of the postal codes. I decided this due to the observation that both relationship between postal codes, to boroughs/neighborhoods was many-to-many and the lack of geolocation data borough and neighborhood granularity.

#### After several iterations, I chose the exploration radius at 400 meters as this seem to give the most distinct definitions to the postal code areas.

In [175]:
radius = 400
LIMIT = 100

In [176]:
def getNearbyVenues(names, boroughs, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, borough, lat, lng in zip(names, boroughs, latitudes, longitudes):
#        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            borough,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postal Code',
                  'Borough',
                  'PCode Latitude', 
                  'PCode Longitude', 
                  'Venue', 
                  'V Latitude', 
                  'V Longitude', 
                  'V Category']
    
    return(nearby_venues)

In [177]:
df_t_venues = getNearbyVenues(names=df_toronto['Postal Code'], boroughs=df_toronto['Borough'],
                                   latitudes=df_toronto['Latitude'], longitudes=df_toronto['Longitude'] )
print('Toronto venues shape: ',df_t_venues.shape)
df_t_venues.head()

Toronto venues shape:  (1623, 8)


Unnamed: 0,Postal Code,Borough,PCode Latitude,PCode Longitude,Venue,V Latitude,V Longitude,V Category
0,M5A,Downtown Toronto,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,M5A,Downtown Toronto,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,M5A,Downtown Toronto,43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,M5A,Downtown Toronto,43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,M5A,Downtown Toronto,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [178]:
df_t_venues.groupby('Postal Code').count()

Unnamed: 0_level_0,Borough,PCode Latitude,PCode Longitude,Venue,V Latitude,V Longitude,V Category
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
M4E,4,4,4,4,4,4,4
M4K,42,42,42,42,42,42,42
M4L,23,23,23,23,23,23,23
M4M,40,40,40,40,40,40,40
M4N,3,3,3,3,3,3,3
M4P,9,9,9,9,9,9,9
M4R,22,22,22,22,22,22,22
M4S,34,34,34,34,34,34,34
M4T,2,2,2,2,2,2,2
M4V,16,16,16,16,16,16,16


In [179]:
print('There are {} uniques categories.'.format(len(df_t_venues['V Category'].unique())))

There are 242 uniques categories.


## Analyzing Each Postal Code

In [180]:
# one hot encoding
toronto_onehot = pd.get_dummies(df_t_venues[['V Category']], prefix="", prefix_sep="")

# add Postal Code and Borough columns back to dataframe
toronto_onehot['Borough'] = df_t_venues['Borough'] 
toronto_onehot['Postal Code'] = df_t_venues['Postal Code'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Borough
0,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
1,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
2,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
3,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto
4,M5A,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Downtown Toronto


In [181]:
print('Toronto Onehot df shape: ',toronto_onehot.shape)

Toronto Onehot df shape:  (1623, 244)


In [182]:
toronto_grouped = toronto_onehot.groupby('Postal Code').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Postal Code,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,M4E,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,...,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381
2,M4L,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.025
4,M4N,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [183]:
print('Toronto grouped df shape:', toronto_grouped.shape)

Toronto grouped df shape: (39, 243)


In [184]:
num_top_venues = 3

for code in toronto_grouped['Postal Code']:
    print("----"+code+"----")
    temp = toronto_grouped[toronto_grouped['Postal Code'] == code].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----M4E----
               venue  freq
0  Health Food Store  0.25
1                Pub  0.25
2              Trail  0.25


----M4K----
                venue  freq
0    Greek Restaurant  0.21
1  Italian Restaurant  0.07
2         Coffee Shop  0.07


----M4L----
                  venue  freq
0                  Park  0.09
1        Sandwich Place  0.09
2  Fast Food Restaurant  0.09


----M4M----
         venue  freq
0         Café  0.10
1  Coffee Shop  0.08
2      Brewery  0.05


----M4N----
         venue  freq
0         Park  0.33
1     Bus Line  0.33
2  Swim School  0.33


----M4P----
               venue  freq
0            Dog Run  0.11
1   Department Store  0.11
2  Convenience Store  0.11


----M4R----
                     venue  freq
0           Clothing Store  0.18
1              Coffee Shop  0.09
2  Health & Beauty Service  0.05


----M4S----
            venue  freq
0    Dessert Shop  0.09
1  Sandwich Place  0.09
2            Café  0.06


----M4T----
               venue  freq
0    

Venue descending sorter:

In [185]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [186]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postal Code']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Venue'.format(ind+1))

# create a new dataframe
pcodes_venues_sorted = pd.DataFrame(columns=columns)
pcodes_venues_sorted['Postal Code'] = toronto_grouped['Postal Code']

for ind in np.arange(toronto_grouped.shape[0]):
    pcodes_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

pcodes_venues_sorted.head()

Unnamed: 0,Postal Code,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
0,M4E,Trail,Health Food Store,Neighborhood,Pub,Doner Restaurant
1,M4K,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Ice Cream Shop
2,M4L,Park,Sandwich Place,Fast Food Restaurant,Pub,Liquor Store
3,M4M,Café,Coffee Shop,Bakery,Gastropub,American Restaurant
4,M4N,Park,Bus Line,Swim School,Yoga Studio,Dessert Shop


## Cluster Postal Codes

#### After several iterations, I chose the number of clusters to be 8. Any number lower than 8 and the results come back as one big cluster with the remaining clusters have membership of only one or two postal codes. At 8, the one big cluster finally split into two smaller clusters.

In [187]:
# set number of clusters
kclusters = 8

t_grp_clustering = toronto_grouped.drop('Postal Code', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(t_grp_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:40] 

array([0, 6, 6, 1, 4, 6, 6, 6, 2, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       7, 5, 1, 1, 1, 6, 6, 6, 1, 1, 6, 1, 1, 6, 6, 6, 6], dtype=int32)

In [188]:
# add clustering labels
pcodes_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_venues_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each postal code
toronto_venues_merged = toronto_venues_merged.join(pcodes_venues_sorted.set_index('Postal Code'), on='Postal Code')

toronto_venues_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,6,Coffee Shop,Bakery,Pub,Park,Breakfast Spot
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,6,Coffee Shop,Sushi Restaurant,Yoga Studio,Bank,Beer Bar
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,6,Clothing Store,Coffee Shop,Café,Bubble Tea Shop,Restaurant
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,6,Coffee Shop,Café,Cocktail Bar,American Restaurant,Gastropub
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Health Food Store,Neighborhood,Pub,Doner Restaurant


In [198]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_venues_merged['Latitude'], toronto_venues_merged['Longitude'], toronto_venues_merged['Postal Code'], toronto_venues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine the Postal Code Clusters

#### The Toronto postal codes resolved into two main clusters, where Cluster 2's dominant venue is Cafés and Cluster 7's dominant venue is Coffee Shops. All other clusters are essentially outliers that are probably mostly residential, as their top venues tended to be residential amenities. I was a bit surprised that these residential outliers did not show enough similiarity to create a third distinct cluster.

Cluster 1:

In [190]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 0, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
19,M4E,East Toronto,0,Trail,Health Food Store,Neighborhood,Pub,Doner Restaurant


Cluster 2: Cafés

In [191]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 1, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
25,M6G,Downtown Toronto,1,Grocery Store,Café,Park,Baby Store,Nightclub
31,M6H,West Toronto,1,Bakery,Pharmacy,Brewery,Bank,Bar
43,M6K,West Toronto,1,Café,Coffee Shop,Breakfast Spot,Pet Store,Stadium
54,M4M,East Toronto,1,Café,Coffee Shop,Bakery,Gastropub,American Restaurant
69,M6P,West Toronto,1,Mexican Restaurant,Café,Thai Restaurant,Fried Chicken Joint,Italian Restaurant
74,M5R,Central Toronto,1,Sandwich Place,Café,Coffee Shop,BBQ Joint,Pizza Place
80,M5S,Downtown Toronto,1,Café,Bar,Italian Restaurant,Japanese Restaurant,Bookstore
84,M5T,Downtown Toronto,1,Café,Mexican Restaurant,Vietnamese Restaurant,Coffee Shop,Bakery


Cluster 3:

In [192]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 2, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
83,M4T,Central Toronto,2,Playground,Trail,Yoga Studio,Department Store,Falafel Restaurant


Cluster 4:

In [193]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 3, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
91,M4W,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Doner Restaurant


Cluster 5:

In [194]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 4, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
61,M4N,Central Toronto,4,Park,Bus Line,Swim School,Yoga Studio,Dessert Shop


Cluster 6:

In [195]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 5, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
68,M5P,Central Toronto,5,Park,Trail,Sushi Restaurant,Bus Line,Jewelry Store


Cluster 7: Coffee Shops

In [196]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 6, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
2,M5A,Downtown Toronto,6,Coffee Shop,Bakery,Pub,Park,Breakfast Spot
4,M7A,Downtown Toronto,6,Coffee Shop,Sushi Restaurant,Yoga Studio,Bank,Beer Bar
9,M5B,Downtown Toronto,6,Clothing Store,Coffee Shop,Café,Bubble Tea Shop,Restaurant
15,M5C,Downtown Toronto,6,Coffee Shop,Café,Cocktail Bar,American Restaurant,Gastropub
20,M5E,Downtown Toronto,6,Coffee Shop,Cocktail Bar,Bakery,Cheese Shop,Café
24,M5G,Downtown Toronto,6,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Bar
30,M5H,Downtown Toronto,6,Coffee Shop,Café,Restaurant,Deli / Bodega,Gym
36,M5J,Downtown Toronto,6,Coffee Shop,Aquarium,Hotel,Café,Fried Chicken Joint
37,M6J,West Toronto,6,Bar,Asian Restaurant,Restaurant,Café,Vegetarian / Vegan Restaurant
41,M4K,East Toronto,6,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Ice Cream Shop


Cluster 8:

In [197]:
toronto_venues_merged.loc[toronto_venues_merged['Cluster Labels'] == 7, toronto_venues_merged.columns[[0] + [1] + list(range(5, toronto_venues_merged.shape[1]))]]

Unnamed: 0,Postal Code,Borough,Cluster Labels,1st Venue,2nd Venue,3rd Venue,4th Venue,5th Venue
62,M5N,Central Toronto,7,Garden,Health & Beauty Service,Home Service,Donut Shop,Diner
