<h1 align=center><font size = 5>Segmenting and Clustering Neighborhoods in Toronto</font></h1>

### Install Beautiful Soup package

In [1]:
!pip install bs4
!pip install geopy



### Import libraries

In [33]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

### Download the wikipedia url

In [3]:
!wget https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M -O toronto_postalcodes.html

--2021-06-28 04:20:13--  https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
Resolving en.wikipedia.org (en.wikipedia.org)... 198.35.26.96, 2620:0:863:ed1a::1
Connecting to en.wikipedia.org (en.wikipedia.org)|198.35.26.96|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 83445 (81K) [text/html]
Saving to: ‘toronto_postalcodes.html’


2021-06-28 04:20:14 (456 KB/s) - ‘toronto_postalcodes.html’ saved [83445/83445]



### Read the wikipedia html data

In [4]:
html = ''.join(open("toronto_postalcodes.html").readlines())

### Process wikipedea data into dataframe

In [5]:
soup = BeautifulSoup(html, 'html.parser')
table_contents=[]
table = soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = row.span.text.split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [6]:
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [7]:
df.shape

(103, 3)

### Read geospatial coordinates data

In [8]:
coords = pd.read_csv('./Geospatial_Coordinates.csv')

In [9]:
coords.columns = ['PostalCode', 'Latitude', 'Longitude']
neighborhoods = df.merge(coords)

In [10]:
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


### Create a map of Toronto 

In [11]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [12]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Get all Neighborhoods below median latitude 

In [13]:
median_latitude = neighborhoods['Latitude'].median()
southern_neighborhoods = neighborhoods[neighborhoods['Latitude']<median_latitude]
southern_neighborhoods['Latitude'].max()

43.696319

### Create map of the southern neighborhoods

In [14]:
map_neighborhoods = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(southern_neighborhoods['Latitude'], southern_neighborhoods['Longitude'], southern_neighborhoods['Borough'], southern_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_neighborhoods)  
    
map_neighborhoods

#### Define Foursquare Credentials and Version


In [15]:
CLIENT_ID = 'W3PK514DCWEWJUBPJJIS5E20VGTHQ5GOJHLFMHBM40Z2AS0N' # your Foursquare ID
CLIENT_SECRET = 'SOPT4QIDP0NWQQGG51214QAETBK0TEYBICJEPD4UDLK0IPJV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: W3PK514DCWEWJUBPJJIS5E20VGTHQ5GOJHLFMHBM40Z2AS0N
CLIENT_SECRET:SOPT4QIDP0NWQQGG51214QAETBK0TEYBICJEPD4UDLK0IPJV


#### Create a function to get venues in all the sourthern neighborhoods

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
# type your answer here
southern_venues = getNearbyVenues(names=southern_neighborhoods['Neighborhood'],
                                   latitudes=southern_neighborhoods['Latitude'],
                                   longitudes=southern_neighborhoods['Longitude']
                                  )

Regent Park, Harbourfront
Ontario Provincial Government
Islington Avenue
Garden District, Ryerson
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
The Beaches
Berczy Park
Caledonia-Fairbanks
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Del Ray, Mount Dennis, Keelsdale and Silverthorn
Birch Cliff, Cliffside West
Runnymede, The Junction North
High Park, The Junction South
Westmount
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Enclave of L4W
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
University o

#### Let's check the size of the resulting dataframe

In [18]:
print(southern_venues.shape)
southern_venues.head()

(1564, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


#### Let's check how many venues were returned for each neighborhood


In [19]:
southern_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",6,6,6,6,6,6
Berczy Park,46,46,46,46,46,46
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Caledonia-Fairbanks,4,4,4,4,4,4
Central Bay Street,62,62,62,62,62,62
Christie,14,14,14,14,14,14
Church and Wellesley,69,69,69,69,69,69
"Commerce Court, Victoria Hotel",100,100,100,100,100,100


#### Let's find out how many unique categories can be curated from all the returned venues


In [20]:
print('There are {} uniques categories.'.format(len(southern_venues['Venue Category'].unique())))

There are 226 uniques categories.


#### Analyze Each Neighborhood


In [21]:
# one hot encoding
southern_onehot = pd.get_dummies(southern_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
southern_onehot['Neighborhood'] = southern_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [southern_onehot.columns[-1]] + list(southern_onehot.columns[:-1])
southern_onehot = southern_onehot[fixed_columns]

southern_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Examine the new dataframe size.


In [22]:
southern_onehot.shape

(1564, 226)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [23]:
southern_grouped = southern_onehot.groupby('Neighborhood').mean().reset_index()
southern_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Brockton, Parkdale Village, Exhibition Place",0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.066667,0.133333,0.133333,0.133333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
6,Central Bay Street,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016129,0.0,0.0,0.016129,0.0,0.0,0.0
7,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Church and Wellesley,0.014493,0.014493,0.014493,0.0,0.0,0.0,0.0,0.014493,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0
9,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0


Confirm the new size

In [24]:
southern_grouped.shape

(50, 226)

#### Let's print each neighborhood along with the top 5 most common venues


In [25]:
num_top_venues = 5

for hood in southern_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = southern_grouped[southern_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.33
1  Sandwich Place  0.17
2             Pub  0.17
3     Coffee Shop  0.17
4    Dance Studio  0.17


----Berczy Park----
                venue  freq
0        Cocktail Bar  0.09
1      Sandwich Place  0.07
2         Coffee Shop  0.07
3              Bakery  0.07
4  Seafood Restaurant  0.04


----Birch Cliff, Cliffside West----
                     venue  freq
0          College Stadium  0.25
1             Skating Rink  0.25
2    General Entertainment  0.25
3                     Café  0.25
4  New American Restaurant  0.00


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0  Sandwich Place  0.09
1  Breakfast Spot  0.09
2            Café  0.09
3     Coffee Shop  0.09
4       Nightclub  0.05


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0    Airport Lounge  0.13
1   Airport Service  0.13
2  Ai

#### Let's put that into a _pandas_ dataframe


First, let's write a function to sort the venues in descending order.


In [26]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [27]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = southern_grouped['Neighborhood']

for ind in np.arange(southern_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(southern_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Dance Studio,Coffee Shop,Pub,Sandwich Place,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
1,Berczy Park,Cocktail Bar,Coffee Shop,Bakery,Sandwich Place,Seafood Restaurant,Vegetarian / Vegan Restaurant,Beer Bar,Farmers Market,Bistro,Basketball Stadium
2,"Birch Cliff, Cliffside West",College Stadium,Café,Skating Rink,General Entertainment,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
3,"Brockton, Parkdale Village, Exhibition Place",Sandwich Place,Café,Coffee Shop,Breakfast Spot,Yoga Studio,Climbing Gym,Restaurant,Stadium,Japanese Restaurant,Italian Restaurant
4,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport Terminal,Plane,Bar,Coffee Shop,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry


## Cluster Neighborhoods


Run _k_-means to cluster the neighborhood into 5 clusters.


In [28]:
# set number of clusters
kclusters = 5

southern_grouped_clustering = southern_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(southern_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [29]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

southern_merged = southern_neighborhoods

# merge southernn_grouped with southern_data to add latitude/longitude for each neighborhood
southern_merged = southern_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

southern_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0.0,Coffee Shop,Pub,Park,Bakery,Café,Mexican Restaurant,Dessert Shop,Chocolate Shop,Discount Store,Distribution Center
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,0.0,Coffee Shop,Sushi Restaurant,Burrito Place,Yoga Studio,Mexican Restaurant,Bank,Japanese Restaurant,Café,Restaurant,Diner
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,,,,,,
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0.0,Coffee Shop,Clothing Store,Sandwich Place,Café,Bank,Pizza Place,Japanese Restaurant,Cosmetics Shop,Hotel,Bookstore
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,3.0,Filipino Restaurant,Middle Eastern Restaurant,Women's Store,Event Space,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


### Remove NaN data

In [74]:
southern_merged.dropna(axis='rows', inplace=True)

Finally, let's visualize the resulting clusters


In [75]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(southern_merged['Latitude'], southern_merged['Longitude'], southern_merged['Neighborhood'], southern_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters


Cluster 1

It appears as though cluster 1 consists of neighborhoods that's most common venues are Coffe Shops and Cafés

In [78]:
southern_merged.loc[southern_merged['Cluster Labels'] == 0, southern_merged.columns[[1] + list(range(5, southern_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0.0,Coffee Shop,Pub,Park,Bakery,Café,Mexican Restaurant,Dessert Shop,Chocolate Shop,Discount Store,Distribution Center
4,Queen's Park,0.0,Coffee Shop,Sushi Restaurant,Burrito Place,Yoga Studio,Mexican Restaurant,Bank,Japanese Restaurant,Café,Restaurant,Diner
9,Downtown Toronto,0.0,Coffee Shop,Clothing Store,Sandwich Place,Café,Bank,Pizza Place,Japanese Restaurant,Cosmetics Shop,Hotel,Bookstore
14,East York,0.0,Spa,Dance Studio,Beer Store,Skating Rink,Intersection,Concert Hall,Discount Store,Ethiopian Restaurant,Colombian Restaurant,Comfort Food Restaurant
15,Downtown Toronto,0.0,Coffee Shop,Café,Cocktail Bar,Italian Restaurant,Clothing Store,Beer Bar,Restaurant,Moroccan Restaurant,Cosmetics Shop,Department Store
16,York,0.0,Tennis Court,Field,Hockey Arena,Trail,Deli / Bodega,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
17,Etobicoke,0.0,Liquor Store,Beer Store,Park,Coffee Shop,Convenience Store,Café,Shopping Plaza,Pet Store,Pharmacy,Garden Center
19,East Toronto,0.0,Health Food Store,Pub,Trail,Women's Store,Deli / Bodega,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
20,Downtown Toronto,0.0,Cocktail Bar,Coffee Shop,Bakery,Sandwich Place,Seafood Restaurant,Vegetarian / Vegan Restaurant,Beer Bar,Farmers Market,Bistro,Basketball Stadium
24,Downtown Toronto,0.0,Coffee Shop,Sandwich Place,Sushi Restaurant,Italian Restaurant,Japanese Restaurant,Café,Salad Place,Pizza Place,Bank,Restaurant


Cluster 2

For this cluster it appears as though the park is the most popular venue among these southern neighborhoods

In [79]:
southern_merged.loc[southern_merged['Cluster Labels'] == 1, southern_merged.columns[[1] + list(range(5, southern_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,York,1.0,Park,Women's Store,Pool,Dance Studio,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
35,East York/East Toronto,1.0,Intersection,Park,Convenience Store,Deli / Bodega,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
91,Downtown Toronto,1.0,Park,Playground,Trail,Women's Store,Dance Studio,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


Cluster 3

In [80]:
southern_merged.loc[southern_merged['Cluster Labels'] == 2, southern_merged.columns[[1] + list(range(5, southern_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
98,Etobicoke,2.0,River,Women's Store,Deli / Bodega,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


Cluster 4

In [81]:
southern_merged.loc[southern_merged['Cluster Labels'] == 3, southern_merged.columns[[1] + list(range(5, southern_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,3.0,Filipino Restaurant,Middle Eastern Restaurant,Women's Store,Event Space,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run


Cluster 5


In [82]:
southern_merged.loc[southern_merged['Cluster Labels'] == 4, southern_merged.columns[[1] + list(range(5, southern_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,Central Toronto,4.0,Lawyer,Restaurant,Women's Store,Deli / Bodega,Escape Room,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run
