# Segmenting and Clustering neighborhoods in Toronto

<p align="right">Antoine Driot</p>

<p align='center'><img src='https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Montage_of_Toronto_7.jpg/290px-Montage_of_Toronto_7.jpg'></img></p>

In [1]:
import pandas as pd
import numpy as np

CLIENT_ID = '********************' # your Foursquare ID
CLIENT_SECRET = '********************' # your Foursquare Secret

## Part I

### Get Data

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
filename = 'wikipedia.html'

In [3]:
# download html page into a file

!wget -q -O 'wikipedia.html' https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
print('Data downloaded!')

Data downloaded!


In [4]:
# Fill a string var with the file content (ie the html)

import codecs
f=codecs.open(filename, 'r')
#print(f.read())
html_full = f.read()
f.close()

### Parse Data

In [5]:
# extract the html table from the full html response

balise1='<table class="wikitable sortable">'
balise2='</tbody></table>'

pos1 = html_full.find(balise1)
pos2 = html_full.find(balise2)

html_extract = html_full[pos1:pos2+len(balise2)]

In [6]:
# use a lxml to directly turn the html table into a dataframe 

#pip install lxml 

dfs = pd.read_html(html_extract) 
df = dfs[0]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West
286,M8Z,Etobicoke,South of Bloor


### Clean Data

In [7]:
# drop all lines with 'Not assigned' in col Borough

df = df[df.Borough != 'Not assigned']
df.shape

(211, 3)

In [8]:
# concatenate lines with same postcode

df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(','.join).reset_index()
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie..."
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam..."


In [9]:
# check presence of 'Not assigned' in last col

df.loc[lambda df: df['Neighbourhood'] == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood
85,M7A,Queen's Park,Not assigned


In [10]:
# scan df line by line to modify value 

for index, row in df.iterrows():
    if row['Neighbourhood'] == 'Not assigned' :
        row['Neighbourhood'] = row['Borough']
        

In [11]:
# verify no more presence of 'Not assigned' in last col

df.loc[lambda df: df['Neighbourhood'] == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood


### Conclusion

In [12]:
df.shape

(103, 3)

# .

## Part II

In [13]:
# let's add 2 columns to our DataFrame

df['Latitude'] = ''
df['Longitude'] = ''

### Try to get coordinates from geocoder

In [14]:
#!conda install -c conda-forge geocoder --yes  
import geocoder # import geocoder

def GetCoordToronto(postal_code) :

    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

    return(latitude,longitude)

In [15]:
#GetCoordToronto('M6A')

That doesn't work. It take forever for nothing  
### Instead, let's load the proposed csv file into a dataframe

In [16]:
!wget -q -O 'codepostaux.csv' https://cocl.us/Geospatial_data/Geospatial_Coordinates.csv

In [17]:
cp = pd.read_csv("codepostaux.csv") 
cp

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


### Let's create another function to get the coords

Note : Maybe a better approach is to get the intersect of the 2 dataframes

In [18]:
def GetCoordToronto2(postal_code) :

    minidf = cp.loc[cp['Postal Code']==postal_code,['Latitude','Longitude']]
    return (minidf.iloc[0]['Latitude'],minidf.iloc[0]['Longitude'])
    #return(latitude,longitude)

### Then loop on the dataframe and get coordinates one by one with our function

In [19]:
for index,row in df.iterrows():
    coord = ''
    row['Latitude'] = ''
    row['Longitude'] = ''
    coord = GetCoordToronto2(row['Postcode'])
    row['Latitude'] = coord[0]
    row['Longitude'] = coord[1]
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.8067,-79.1944
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7845,-79.1605
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7636,-79.1887
3,M1G,Scarborough,Woburn,43.771,-79.2169
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395
...,...,...,...,...,...
98,M9N,York,Weston,43.7069,-79.5182
99,M9P,Etobicoke,Westmount,43.6963,-79.5322
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",43.6889,-79.5547
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",43.7394,-79.5884


In [20]:
# verify no more empty latitude

df.loc[lambda df: df['Latitude'] == '']

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude


# .

# Part III

### Get the coordinates for Toronto City

In [21]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate are {}, {}.'.format(latitude, longitude))

The geograpical coordinate are 43.653963, -79.387207.


### Display the map and the neighborhoods

In [22]:
import folium  
latitude = 43.653963
longitude = -79.387207

# create map
mymap = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(mymap)  
    
mymap

### Let's get finest venues for each borough from 4square

In [23]:
import requests # library to handle requests

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
VERSION = '20180605' # Foursquare API version
LIMIT = 100

my_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )


Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens,Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,Riverdale
The Beaches West,Indi

In [27]:
my_venues.to_csv('4square02.csv')

In [28]:
my_venues.shape

(2264, 7)

In [29]:
my_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North,L'Amoreaux East,Milliken,Steeles East",3,3,3,3,3,3
"Albion Gardens,Beaumond Heights,Humbergate,Jamestown,Mount Olive,Silverstone,South Steeles,Thistletown",11,11,11,11,11,11
"Alderwood,Long Branch",10,10,10,10,10,10
...,...,...,...,...,...,...
Willowdale West,5,5,5,5,5,5
Woburn,3,3,3,3,3,3
"Woodbine Gardens,Parkview Hill",11,11,11,11,11,11
Woodbine Heights,10,10,10,10,10,10


In [30]:
print('There are {} uniques categories.'.format(len(my_venues['Venue Category'].unique())))

There are 274 uniques categories.


<b><font color='green' size=5>Apparently, there already is a column with the name 'Neighborhood' in our categories<br>
    so we will rename it, to be able to continue</font></b>  
<i>We can check it with the code : my_onehot.columns.get_loc("Neighborhood")</i>

In [31]:
# one hot encoding
my_onehot = pd.get_dummies(my_venues[['Venue Category']], prefix="", prefix_sep="")

# apparently, there already is a column with this name so let's change its name before inserting ours
my_onehot.rename(columns={'Neighborhood': 'Neighborhood_z'}, inplace=True)

# add neighborhood column back to dataframe
my_onehot['Neighborhood'] = my_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [my_onehot.columns[-1]] + list(my_onehot.columns[:-1])
my_onehot = my_onehot[fixed_columns]

my_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge,Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Highland Creek,Rouge Hill,Port Union",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
my_grouped = my_onehot.groupby('Neighborhood').mean().reset_index()
my_grouped

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
4,"Alderwood,Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,Willowdale West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
98,"Woodbine Gardens,Parkview Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0
99,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,...,0.0,0.00,0.0,0.1,0.0,0.0,0.00,0.0,0.0,0.0


In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = my_grouped['Neighborhood']

for ind in np.arange(my_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(my_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Bar,Steakhouse,Cosmetics Shop,Hotel,Restaurant,Burger Joint,American Restaurant,Thai Restaurant
1,Agincourt,Lounge,Breakfast Spot,Skating Rink,Clothing Store,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Park,Playground,Coffee Shop,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Grocery Store,Pharmacy,Coffee Shop,Japanese Restaurant,Discount Store,Sandwich Place,Beer Store,Pizza Place,Fast Food Restaurant,Fried Chicken Joint
4,"Alderwood,Long Branch",Pizza Place,Gym,Skating Rink,Pharmacy,Coffee Shop,Pub,Athletics & Sports,Sandwich Place,Pool,Yoga Studio


## Let's run the unsupervised algorithm

In [35]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

my_grouped_clustering = my_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(my_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 1, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

my_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
my_merged = my_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

my_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",43.8067,-79.1944,2.0,Fast Food Restaurant,Print Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.7845,-79.1605,2.0,Moving Target,History Museum,Bar,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.7636,-79.1887,2.0,Pizza Place,Breakfast Spot,Rental Car Location,Electronics Store,Medical Center,Intersection,Mexican Restaurant,Yoga Studio,Doner Restaurant,Diner
3,M1G,Scarborough,Woburn,43.771,-79.2169,2.0,Coffee Shop,Korean Restaurant,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.7731,-79.2395,2.0,Hakka Restaurant,Thai Restaurant,Fried Chicken Joint,Bank,Bakery,Athletics & Sports,Caribbean Restaurant,Lounge,Cuban Restaurant,Cupcake Shop


<font color='green'>I have a little problem. 4 borough don't have a Cluster   
I may have a problem somewhere up but I won't check it.  
 I will cheat by droping the lines, like shown hereunder</font>

In [37]:
my_merged['Cluster Labels'].replace('', np.nan, inplace=True)
my_merged.dropna(subset=['Cluster Labels'], inplace=True)

In [38]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(my_merged['Latitude'], my_merged['Longitude'], my_merged['Neighbourhood'], my_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Now we can visualize thanks to the map and the details of clusters
## We could try to give them names

### Cluster 1

In [39]:
my_merged.loc[my_merged['Cluster Labels'] == 0, my_merged.columns[[1] + list(range(5, my_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,0.0,American Restaurant,Motel,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dim Sum Restaurant


### Cluster 2

In [40]:
my_merged.loc[my_merged['Cluster Labels'] == 1, my_merged.columns[[1] + list(range(5, my_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,1.0,Park,Playground,Coffee Shop,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
23,North York,1.0,Park,Bank,Convenience Store,Bar,Yoga Studio,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
25,North York,1.0,Food & Drink Shop,Park,Yoga Studio,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
30,North York,1.0,Airport,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
40,East York,1.0,Intersection,Park,Coffee Shop,Convenience Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
44,Central Toronto,1.0,Park,Swim School,Bus Line,Lawyer,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
50,Downtown Toronto,1.0,Park,Trail,Playground,Building,Yoga Studio,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
74,York,1.0,Park,Women's Store,Fast Food Restaurant,Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Dim Sum Restaurant
79,North York,1.0,Bakery,Park,Construction & Landscaping,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
90,Etobicoke,1.0,River,Park,Yoga Studio,Drugstore,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


### CLuster 3

In [41]:
my_merged.loc[my_merged['Cluster Labels'] == 2, my_merged.columns[[1] + list(range(5, my_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2.0,Fast Food Restaurant,Print Shop,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
1,Scarborough,2.0,Moving Target,History Museum,Bar,Yoga Studio,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
2,Scarborough,2.0,Pizza Place,Breakfast Spot,Rental Car Location,Electronics Store,Medical Center,Intersection,Mexican Restaurant,Yoga Studio,Doner Restaurant,Diner
3,Scarborough,2.0,Coffee Shop,Korean Restaurant,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
4,Scarborough,2.0,Hakka Restaurant,Thai Restaurant,Fried Chicken Joint,Bank,Bakery,Athletics & Sports,Caribbean Restaurant,Lounge,Cuban Restaurant,Cupcake Shop
...,...,...,...,...,...,...,...,...,...,...,...,...
95,Etobicoke,2.0,Pharmacy,Liquor Store,Café,Coffee Shop,Convenience Store,Beer Store,Pizza Place,Pet Store,Ethiopian Restaurant,Empanada Restaurant
96,North York,2.0,Pizza Place,Empanada Restaurant,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
99,Etobicoke,2.0,Pizza Place,Intersection,Discount Store,Sandwich Place,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Donut Shop,Diner,Dog Run
101,Etobicoke,2.0,Grocery Store,Pharmacy,Coffee Shop,Japanese Restaurant,Discount Store,Sandwich Place,Beer Store,Pizza Place,Fast Food Restaurant,Fried Chicken Joint


### and so on ...

## See ya !