# 3rd week assignment: Clustering Toronto

### by Arturo López

In [1]:
import pandas as pd

### Get data from wikipedia and generate a data frame

In [2]:
#get the table from wikipedia and create a dataframe
dfs=pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
df=dfs[0]
df=df.rename(columns={'Neighbourhood': 'Neighborhood'})
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### The table contains rows lacking data. If row has no Borough, drop it. If no neighborhood is assigned, assume its borough.

In [3]:
#clean the data frame
df.drop(df.loc[df['Borough']=='Not assigned'].index, inplace=True)
df.set_index(['Neighborhood']).replace('Not assigned', df.Borough)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
df.shape

(103, 3)

### Getting geographic data for each postal code.

In [5]:
locations=pd.read_csv('https://cocl.us/Geospatial_data')

In [6]:
df_loc=pd.merge(df, locations)
df_loc.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [7]:
df_loc.shape

(103, 5)

### Finally, segmenting and clustering neighborhoods in Toronto

In [8]:
#import required libraries
import numpy as np
from geopy.geocoders import Nominatim 
import requests
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!pip install folium
!conda install -c conda-forge folium=0.12.0 --yes
import folium

print('libraries ready!')

Collecting folium
  Downloading folium-0.12.0-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.0 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.0
libraries ready!


In [9]:
neighborhoods=df_loc.copy()
neighborhoods.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [10]:
# Let's get the coordinates of Toronto
address = 'Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.65238435, -79.38356765.


#### Let's create a map of Toronto with its neighborhoods superimposed.

In [11]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='purple',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### The analysis will be focused on boroughs containing Toronto in its name. This will reduce the number of total neighborhoods but it will be easier to look at.

In [12]:
toronto_data = neighborhoods[neighborhoods['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### As said, let's reduce the markers down to Toronto boroughs and create a new map. It can also help to see how many boroughs are we missing reducing the information to Toronto Boroughs.

In [13]:
# create map of Manhattan using latitude and longitude values
map_onlytoronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_onlytoronto)  
    
map_onlytoronto

#### Let's get Foursquare ready to use:

In [14]:
# The code was removed by Watson Studio for sharing.

#### Now let's add a limit to venues results and a maximum radius where foursquare should look for each neighborhood.

In [15]:
LIMIT=100
radius=500

#### I will use the function define in the Lab so it looks for all the venues in the defined radius for every neighborhood. 

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now we use the function:

In [17]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [18]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,57,57,57,57,57,57
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15
Central Bay Street,61,61,61,61,61,61
Christie,16,16,16,16,16,16
Church and Wellesley,79,79,79,79,79,79
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,33,33,33,33,33,33
Davisville North,9,9,9,9,9,9


In [19]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 234 uniques categories.


# Neighborhood analysis.

In [20]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix='', prefix_sep='_')

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,_Airport,_Airport Food Court,_Airport Lounge,_Airport Service,_Airport Terminal,_American Restaurant,_Antique Shop,_Aquarium,_Art Gallery,...,_Tibetan Restaurant,_Toy / Game Store,_Trail,_Train Station,_Vegetarian / Vegan Restaurant,_Video Game Store,_Vietnamese Restaurant,_Wine Bar,_Wine Shop,_Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
toronto_onehot.shape

(1615, 235)

#### Group rows by neighborhood and by taking the mean of the frequency of occurence of each category.

In [22]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,_Airport,_Airport Food Court,_Airport Lounge,_Airport Service,_Airport Terminal,_American Restaurant,_Antique Shop,_Aquarium,_Art Gallery,...,_Tibetan Restaurant,_Toy / Game Store,_Trail,_Train Station,_Vegetarian / Vegan Restaurant,_Video Game Store,_Vietnamese Restaurant,_Wine Bar,_Wine Shop,_Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.066667,0.066667,0.133333,0.2,0.133333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.0,0.016393
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.012658,0.0,0.0,0.025316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,...,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
toronto_grouped.shape

(39, 235)

Which are the 5 most common venues for each neighborhood?

In [24]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                 venue  freq
0         _Coffee Shop  0.09
1        _Cocktail Bar  0.05
2            _Beer Bar  0.04
3  _Seafood Restaurant  0.04
4         _Cheese Shop  0.04


----Brockton, Parkdale Village, Exhibition Place----
                 venue  freq
0                _Café  0.12
1      _Breakfast Spot  0.08
2           _Nightclub  0.08
3         _Coffee Shop  0.08
4  _Italian Restaurant  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                   venue  freq
0    _Light Rail Station  0.12
1           _Pizza Place  0.06
2         _Auto Workshop  0.06
3  _Fast Food Restaurant  0.06
4         _Burrito Place  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
               venue  freq
0   _Airport Service  0.20
1    _Airport Lounge  0.13
2  _Airport Terminal  0.13
3           _Airport  0.07
4     _Boat or Ferry  0.07


----Central Bay

The 5 most popular venues of each neighborhood fitted into a dataframe

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,_Coffee Shop,_Cocktail Bar,_Beer Bar,_Bakery,_Restaurant,_Cheese Shop,_Farmers Market,_Seafood Restaurant,_Concert Hall,_Park
1,"Brockton, Parkdale Village, Exhibition Place",_Café,_Breakfast Spot,_Nightclub,_Coffee Shop,_Performing Arts Venue,_Furniture / Home Store,_Burrito Place,_Restaurant,_Italian Restaurant,_Stadium
2,"Business reply mail Processing Centre, South C...",_Light Rail Station,_Gym / Fitness Center,_Garden,_Brewery,_Spa,_Farmers Market,_Fast Food Restaurant,_Burrito Place,_Restaurant,_Auto Workshop
3,"CN Tower, King and Spadina, Railway Lands, Har...",_Airport Service,_Airport Lounge,_Airport Terminal,_Airport,_Boat or Ferry,_Plane,_Rental Car Location,_Sculpture Garden,_Boutique,_Harbor / Marina
4,Central Bay Street,_Coffee Shop,_Sandwich Place,_Café,_Italian Restaurant,_Burger Joint,_Bubble Tea Shop,_Salad Place,_Thai Restaurant,_Restaurant,_Portuguese Restaurant


# Let's cluster the neighborhoods

In [27]:
# set number of clusters
kclusters = 6

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 3, 0, 3, 3, 3, 3, 3], dtype=int32)

Create a dataframe with the 10 most common venues and its cluster

In [28]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,_Coffee Shop,_Café,_Park,_Bakery,_Breakfast Spot,_Pub,_Theater,_Farmers Market,_French Restaurant,_Wine Shop
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,_Coffee Shop,_Sushi Restaurant,_Yoga Studio,_Fried Chicken Joint,_Beer Bar,_Japanese Restaurant,_Fast Food Restaurant,_Mexican Restaurant,_Portuguese Restaurant,_Bar
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3,_Coffee Shop,_Clothing Store,_Japanese Restaurant,_Café,_Cosmetics Shop,_Middle Eastern Restaurant,_Hotel,_Bubble Tea Shop,_Bookstore,_Ramen Restaurant
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3,_Coffee Shop,_Café,_Gastropub,_American Restaurant,_Cocktail Bar,_Park,_Hotel,_Seafood Restaurant,_Cosmetics Shop,_Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,_Neighborhood,_Health Food Store,_Pub,_Trail,_Yoga Studio,_Dog Run,_Diner,_Discount Store,_Distribution Center,_Donut Shop


Now let's check the resulting clusters

In [29]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Startint with 5 clusters shows that most of the neighborhoods fall into the same cluster. Maybe KMeans needs a higher numbers of clusters to improve the fitting. So the second round I used 6 clusters.

# We look into the clusters and see their common venues.

Cluster 1

In [30]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,_Coffee Shop,_Café,_Park,_Bakery,_Breakfast Spot,_Pub,_Theater,_Farmers Market,_French Restaurant,_Wine Shop
1,Downtown Toronto,0,_Coffee Shop,_Sushi Restaurant,_Yoga Studio,_Fried Chicken Joint,_Beer Bar,_Japanese Restaurant,_Fast Food Restaurant,_Mexican Restaurant,_Portuguese Restaurant,_Bar
6,Downtown Toronto,0,_Coffee Shop,_Sandwich Place,_Café,_Italian Restaurant,_Burger Joint,_Bubble Tea Shop,_Salad Place,_Thai Restaurant,_Restaurant,_Portuguese Restaurant
24,Central Toronto,0,_Café,_Sandwich Place,_Coffee Shop,_Liquor Store,_Indian Restaurant,_Pub,_BBQ Joint,_History Museum,_Middle Eastern Restaurant,_Pizza Place
28,West Toronto,0,_Café,_Coffee Shop,_Sushi Restaurant,_Pub,_Italian Restaurant,_Yoga Studio,_Dessert Shop,_Sandwich Place,_Burrito Place,_Restaurant
31,Central Toronto,0,_Coffee Shop,_Bagel Shop,_Fried Chicken Joint,_Liquor Store,_Sandwich Place,_Restaurant,_Pub,_Bank,_Supermarket,_Sushi Restaurant


Cluster 2

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
29,Central Toronto,1,_Summer Camp,_Yoga Studio,_Farmers Market,_Event Space,_Ethiopian Restaurant,_Escape Room,_Electronics Store,_Eastern European Restaurant,_Donut Shop,_Doner Restaurant


Cluster 3

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
18,Central Toronto,2,_Park,_Bus Line,_Business Service,_Swim School,_Falafel Restaurant,_Ethiopian Restaurant,_Escape Room,_Electronics Store,_Eastern European Restaurant,_Donut Shop
33,Downtown Toronto,2,_Park,_Playground,_Trail,_Yoga Studio,_Deli / Bodega,_Escape Room,_Electronics Store,_Eastern European Restaurant,_Donut Shop,_Doner Restaurant


Cluster 4

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,3,_Coffee Shop,_Clothing Store,_Japanese Restaurant,_Café,_Cosmetics Shop,_Middle Eastern Restaurant,_Hotel,_Bubble Tea Shop,_Bookstore,_Ramen Restaurant
3,Downtown Toronto,3,_Coffee Shop,_Café,_Gastropub,_American Restaurant,_Cocktail Bar,_Park,_Hotel,_Seafood Restaurant,_Cosmetics Shop,_Restaurant
5,Downtown Toronto,3,_Coffee Shop,_Cocktail Bar,_Beer Bar,_Bakery,_Restaurant,_Cheese Shop,_Farmers Market,_Seafood Restaurant,_Concert Hall,_Park
7,Downtown Toronto,3,_Grocery Store,_Café,_Park,_Nightclub,_Italian Restaurant,_Restaurant,_Candy Store,_Athletics & Sports,_Baby Store,_Coffee Shop
8,Downtown Toronto,3,_Coffee Shop,_Café,_Restaurant,_Gym,_Clothing Store,_Hotel,_Thai Restaurant,_Deli / Bodega,_Sushi Restaurant,_Bakery
9,West Toronto,3,_Pharmacy,_Bakery,_Grocery Store,_Pool,_Café,_Middle Eastern Restaurant,_Bar,_Supermarket,_Bank,_Athletics & Sports
10,Downtown Toronto,3,_Coffee Shop,_Aquarium,_Hotel,_Café,_Restaurant,_Fried Chicken Joint,_Brewery,_Italian Restaurant,_Scenic Lookout,_Baseball Stadium
11,West Toronto,3,_Bar,_Coffee Shop,_Asian Restaurant,_Vietnamese Restaurant,_Restaurant,_Vegetarian / Vegan Restaurant,_Café,_Men's Store,_Brewery,_Record Shop
12,East Toronto,3,_Greek Restaurant,_Coffee Shop,_Italian Restaurant,_Bookstore,_Ice Cream Shop,_Furniture / Home Store,_Liquor Store,_Spa,_Japanese Restaurant,_Juice Bar
13,Downtown Toronto,3,_Coffee Shop,_Hotel,_Café,_Salad Place,_American Restaurant,_Japanese Restaurant,_Italian Restaurant,_Seafood Restaurant,_Restaurant,_Pizza Place


Cluster 5

In [34]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,East Toronto,4,_Neighborhood,_Health Food Store,_Pub,_Trail,_Yoga Studio,_Dog Run,_Diner,_Discount Store,_Distribution Center,_Donut Shop


Cluster 6

In [35]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,Central Toronto,5,_Jewelry Store,_Trail,_Mexican Restaurant,_Sushi Restaurant,_Yoga Studio,_Dessert Shop,_Ethiopian Restaurant,_Escape Room,_Electronics Store,_Eastern European Restaurant


So here we can see that teo of the clusters take the majority of neighborhoods while the other 4 clusters are very unpopulated. There is not a gib variety of neighborhoods in Toronto from this analysis.