# Segmenting and Clustering Neighborhoods in Toronto

## Installing an Importing necessary libraries for pulling data from web

In [1]:
# ! pip install beautifulsoup4
# ! pip install requests
# ! pip install geocoder
! pip install folium



In [22]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

## Importing and wrangling data

In [3]:
page = requests.get(r"https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find(class_='wikitable sortable').get_text()
table1 = table.split('\n\n')
table2 = table1[1:len(table1)+1]

l = [table2[i:i+3] for i in range(0,len(table2),3)]



In [4]:
columns = l[0]
rows = l[1:len(l)+1]
data = pd.DataFrame(columns=columns, data=rows)
data['Postal Code'] = data['Postal Code'].str.strip('\n')
data =  data[data['Borough'] != 'Not assigned']
data.reset_index(drop=True, inplace=True)
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
print(data.shape)

(103, 3)


### Getting coordinates for Postal Codes

In [6]:
# import geocoder

In [7]:
# lat_long_coords = None

# postal_code = data['Postal Code'][0:5]

# while (lat_long_coords is None):
#     g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#     lat_long_coords = g.latlng

# latitude = lat_long_coords[0]
# longitude = lat_long_coords[1]

# print(latitude,longitude)

In [8]:
coords = pd.read_csv('Geospatial_Coordinates.csv')

In [9]:
data_coords = data.merge(coords,on='Postal Code', how='left')
data_coords.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Visualizing Neighborhoods

In [10]:
import folium

In [11]:
map_toronto = folium.Map(location=[data_coords['Latitude'].median(),\
                                   data_coords['Longitude'].median()],zoom_start=11)
for lat,long,label in zip (data_coords['Latitude'],data_coords['Longitude'],data_coords['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,long],
    radius = 5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)
map_toronto

## Extracting Data for Neighboorhoods

#### credintials of foursquare API

In [12]:
import json

In [13]:
CLIENT_ID = 'KS3UQEDTPRM1WNBDQCHKV1EXTOIHJHQ1F3GLMHT0LUMDQJLX'
CLIENT_SECRET = 'B5KHTFCAODOUB4OUPVI000C0QYGSBT0ZFXAJXKYHZCBDOZDQ'
VERSION = '20180605'

In [14]:
LIMIT = 100
radius = 500
neighborhood_latitude = data_coords['Latitude'][0]                               
neighborhood_longitude = data_coords['Longitude'][0]

In [15]:
def getNearbyVenues(names, latitude, longitude, radius=500, LIMIT=100):
    
    venues_list =[]
    for name, lat, lng in zip(names,latitude,longitude):
        

        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},\
                {}&radius={}&limit={}'.format(
                                        CLIENT_ID, 
                                        CLIENT_SECRET, 
                                        VERSION, 
                                        lat, 
                                        lng, 
                                        radius, 
                                        LIMIT)
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
        name,
        lat,
        lng,
        v['venue']['name'],
        v['venue']['location']['lat'],
        v['venue']['location']['lng'],
        v['venue']['categories'][0]['name']) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Neighborhood Latitude',
                             'Neighborhood Longitude',
                             'Venue',
                             'Venue Latitude',
                             'Venue Longitude',
                             'Venue Category']
    return (nearby_venues)

In [16]:
toronto_venues = getNearbyVenues(names=data_coords['Neighborhood'],
                                 latitude = data_coords['Latitude'],
                                 longitude =data_coords['Longitude']
                                )
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Sun Life,43.75476,-79.332783,Construction & Landscaping
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


## Onehot Encoding of venues data

In [17]:
data_coords_onehot = pd.get_dummies(toronto_venues[['Venue Category']])
data_coords_onehot['Neighborhood']=data_coords['Neighborhood']
data_coords_onehot

Unnamed: 0,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,Venue Category_Antique Shop,...,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Regent Park, Harbourfront"
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Lawrence Manor, Lawrence Heights"
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Queen's Park, Ontario Provincial Government"
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Islington Avenue
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Malvern, Rouge"
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Don Mills
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Parkview Hill, Woodbine Gardens"
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Garden District, Ryerson"


In [18]:
fixed_columns = [data_coords_onehot.columns[-1]] + list(data_coords_onehot.columns[:-1])
data_coords_onehot = data_coords_onehot[fixed_columns]
data_coords_onehot.head()

Unnamed: 0,Neighborhood,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,...,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Lawrence Manor, Lawrence Heights",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Queen's Park, Ontario Provincial Government",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
toronto_grouped = data_coords_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Venue Category_Accessories Store,Venue Category_Afghan Restaurant,Venue Category_Airport,Venue Category_Airport Food Court,Venue Category_Airport Gate,Venue Category_Airport Lounge,Venue Category_Airport Service,Venue Category_Airport Terminal,Venue Category_American Restaurant,...,Venue Category_Train Station,Venue Category_Vegetarian / Vegan Restaurant,Venue Category_Video Game Store,Venue Category_Video Store,Venue Category_Vietnamese Restaurant,Venue Category_Warehouse Store,Venue Category_Wine Bar,Venue Category_Wings Joint,Venue Category_Women's Store,Venue Category_Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Business reply mail Processing Centre,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                                            venue  freq
0                          Venue Category_Theater   1.0
1                Venue Category_Accessories Store   0.0
2               Venue Category_Mexican Restaurant   0.0
3  Venue Category_Molecular Gastronomy Restaurant   0.0
4       Venue Category_Modern European Restaurant   0.0


----Alderwood, Long Branch----
                                      venue  freq
0                Venue Category_Coffee Shop   1.0
1          Venue Category_Accessories Store   0.0
2  Venue Category_Middle Eastern Restaurant   0.0
3        Venue Category_Moroccan Restaurant   0.0
4        Venue Category_Monument / Landmark   0.0


----Bathurst Manor, Wilson Heights, Downsview North----
                                            venue  freq
0                Venue Category_French Restaurant   1.0
1                Venue Category_Accessories Store   0.0
2        Venue Category_Middle Eastern Restaurant   0.0
3              Venue Category_M

                                            venue  freq
0               Venue Category_Italian Restaurant   1.0
1                Venue Category_Accessories Store   0.0
2        Venue Category_Middle Eastern Restaurant   0.0
3              Venue Category_Monument / Landmark   0.0
4  Venue Category_Molecular Gastronomy Restaurant   0.0


----Downsview----
                              venue  freq
0                Venue Category_Pub  0.25
1               Venue Category_Bank  0.25
2         Venue Category_Playground  0.25
3        Venue Category_Event Space  0.25
4  Venue Category_Accessories Store  0.00


----Dufferin, Dovercourt Village----
                              venue  freq
0        Venue Category_Yoga Studio   1.0
1              Venue Category_Motel   0.0
2  Venue Category_Martial Arts Dojo   0.0
3     Venue Category_Massage Studio   0.0
4     Venue Category_Medical Center   0.0


----East Toronto----
                                            venue  freq
0                   Ve

                                            venue  freq
0                        Venue Category_Gastropub   1.0
1                Venue Category_Accessories Store   0.0
2        Venue Category_Middle Eastern Restaurant   0.0
3              Venue Category_Monument / Landmark   0.0
4  Venue Category_Molecular Gastronomy Restaurant   0.0


----Moore Park, Summerhill East----
                                      venue  freq
0                Venue Category_Coffee Shop   1.0
1          Venue Category_Accessories Store   0.0
2  Venue Category_Middle Eastern Restaurant   0.0
3        Venue Category_Moroccan Restaurant   0.0
4        Venue Category_Monument / Landmark   0.0


----New Toronto, Mimico South, Humber Bay Shores----
                                     venue  freq
0                      Venue Category_Café   1.0
1         Venue Category_Accessories Store   0.0
2            Venue Category_Massage Studio   0.0
3            Venue Category_Medical Center   0.0
4  Venue Category_Mediterr

                                venue  freq
0       Venue Category_Sandwich Place   1.0
1    Venue Category_Accessories Store   0.0
2  Venue Category_Moroccan Restaurant   0.0
3    Venue Category_Martial Arts Dojo   0.0
4       Venue Category_Massage Studio   0.0


----The Annex, North Midtown, Yorkville----
                              venue  freq
0      Venue Category_Burrito Place   1.0
1  Venue Category_Accessories Store   0.0
2              Venue Category_Motel   0.0
3     Venue Category_Massage Studio   0.0
4     Venue Category_Medical Center   0.0


----The Beaches----
                                      venue  freq
0                Venue Category_Coffee Shop   1.0
1          Venue Category_Accessories Store   0.0
2  Venue Category_Middle Eastern Restaurant   0.0
3        Venue Category_Moroccan Restaurant   0.0
4        Venue Category_Monument / Landmark   0.0


----The Danforth West, Riverdale----
                                            venue  freq
0                    

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Venue Category_Theater,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore,Venue Category_Dessert Shop
1,"Alderwood, Long Branch",Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Venue Category_French Restaurant,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore,Venue Category_Field
3,Bayview Village,Venue Category_Bakery,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant,Venue Category_Filipino Restaurant
4,"Bedford Park, Lawrence Manor East",Venue Category_Furniture / Home Store,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Yoga Studio,Venue Category_Field


## CLustering Neighbothoods

In [29]:
from sklearn.cluster import KMeans

In [99]:
k = 5
toronto_clustering = toronto_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=k, random_state=1).fit(toronto_clustering)
kmeans.labels_
print(toronto_clustering.shape)
print(toronto_grouped.shape)
print(data_coords.shape)
print(coords.shape)

(98, 268)
(98, 269)
(103, 5)
(103, 3)


In [109]:
# neighborhoods_venues_sorted.insert(0,'Cluster Labels', kmeans.labels_)

toronto_merged = data_coords

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Venue Category_Construction & Landscaping,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,0,Venue Category_Food & Drink Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Venue Category_Hockey Arena,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Yoga Studio,Venue Category_Dessert Shop
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,0,Venue Category_Portuguese Restaurant,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Yoga Studio,Venue Category_Department Store
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0,Venue Category_Pizza Place,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
7,M3B,North York,Don Mills,43.745906,-79.352188,1,Venue Category_Park,Venue Category_Bakery,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore,Venue Category_Field
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Venue Category_Breakfast Spot,Venue Category_Yoga Studio,Venue Category_Dim Sum Restaurant,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore


In [110]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [111]:
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

## Visualizing data based on clusters

In [112]:
map_toronto_clusters = folium.Map(location=[data_coords['Latitude'].median(),\
                                   data_coords['Longitude'].median()],zoom_start=11)


for lat,long,poi,cluster in zip (toronto_merged['Latitude'],toronto_merged['Longitude'],toronto_merged['Neighborhood'],toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
    [lat,long],
    radius = 5,
    popup=label,
    color=rainbow[cluster-1],
    fill=True,
    fill_color=rainbow[cluster-1],
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto_clusters)
map_toronto_clusters

## Examining each cluster

In [113]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0,Venue Category_Construction & Landscaping,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
2,Downtown Toronto,0,Venue Category_Food & Drink Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
3,North York,0,Venue Category_Hockey Arena,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Yoga Studio,Venue Category_Dessert Shop
5,Etobicoke,0,Venue Category_Portuguese Restaurant,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Yoga Studio,Venue Category_Department Store
6,Scarborough,0,Venue Category_Pizza Place,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
9,Downtown Toronto,0,Venue Category_Breakfast Spot,Venue Category_Yoga Studio,Venue Category_Dim Sum Restaurant,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore
10,North York,0,Venue Category_Distribution Center,Venue Category_Yoga Studio,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore
11,Etobicoke,0,Venue Category_Spa,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
12,Scarborough,0,Venue Category_Restaurant,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Donut Shop,Venue Category_Department Store
14,East York,0,Venue Category_Gym / Fitness Center,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop


In [114]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
7,North York,1,Venue Category_Park,Venue Category_Bakery,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore,Venue Category_Field
13,North York,1,Venue Category_Park,Venue Category_Bakery,Venue Category_Donut Shop,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore,Venue Category_Field
24,Downtown Toronto,1,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
30,Downtown Toronto,1,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore
64,York,1,Venue Category_Park,Venue Category_Yoga Studio,Venue Category_Donut Shop,Venue Category_Dim Sum Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Drugstore


In [115]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
8,East York,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
19,East Toronto,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
20,Downtown Toronto,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
22,Scarborough,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
26,Scarborough,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
29,East York,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
48,Downtown Toronto,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
51,Scarborough,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant
58,Scarborough,2,Venue Category_Coffee Shop,Venue Category_Yoga Studio,Venue Category_Drugstore,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run,Venue Category_Doner Restaurant,Venue Category_Donut Shop,Venue Category_Eastern European Restaurant


In [116]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,3,Venue Category_Café,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop
38,Scarborough,3,Venue Category_Café,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop
88,Etobicoke,3,Venue Category_Café,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop
98,Etobicoke,3,Venue Category_Café,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Event Space,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop


In [117]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
34,North York,4,Venue Category_Event Space,Venue Category_Yoga Studio,Venue Category_Dessert Shop,Venue Category_Farmers Market,Venue Category_Falafel Restaurant,Venue Category_Ethiopian Restaurant,Venue Category_Electronics Store,Venue Category_Eastern European Restaurant,Venue Category_Drugstore,Venue Category_Donut Shop
40,North York,4,Venue Category_Event Space,Venue Category_Bank,Venue Category_Playground,Venue Category_Pub,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run
46,North York,4,Venue Category_Event Space,Venue Category_Bank,Venue Category_Playground,Venue Category_Pub,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run
53,North York,4,Venue Category_Event Space,Venue Category_Bank,Venue Category_Playground,Venue Category_Pub,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run
60,North York,4,Venue Category_Event Space,Venue Category_Bank,Venue Category_Playground,Venue Category_Pub,Venue Category_Yoga Studio,Venue Category_Doner Restaurant,Venue Category_Diner,Venue Category_Discount Store,Venue Category_Distribution Center,Venue Category_Dog Run
