# Segmenting and Clustering Neighborhoods in Toronto

## Web Scraping: load Wikipedia table into `pandas` dataframe

In [1]:
import pandas as pd # library for data analysis
import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML documents

In [2]:
# Using BeautifulSoup to get the table
wikiurl="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response=requests.get(wikiurl)
soup = BeautifulSoup(response.text, 'html.parser')
table=soup.find('table')

In [3]:
# Retrieve the content in the necessary format
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [4]:
# Load the contents into a pandas dataframe
df=pd.DataFrame(table_contents)

In [5]:
# Apply suggested cleaning by the Hints
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                     'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                     'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                     'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [6]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [7]:
df.shape

(103, 3)

## Find Latitude and Longitude Coordinates

In [8]:
!pip install geocoder
import geocoder



In [9]:
# THIS API DID NOT WORK (ERROR: [REQUEST_DENIED] Google - Geocode [empty])
# (USING GOOGLE COLABORATORY), HENCE I USE THE PREDEFINED CSV IN THE NEXT CELL

# latitude = []
# longitude = []

# for postal_code in df["PostalCode"].to_list():
#   print(postal_code + "...")
#   lat_lng_coords = None
#   # loop until you get the coordinates
#   while(lat_lng_coords is None):
#     g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#     lat_lng_coords = g.latlng
#   latitude.append(lat_lng_coords[0])
#   longitude.append(lat_lng_coords[1])

# df["Latitude"] = latitude
# df["Longitude"] = longitude

In [10]:
# Create pandas dataframe from the preset csv file
postal_df = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv")
postal_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# Join the lat-long coordinates to the original dataframe
df = df.join(postal_df.set_index("Postal Code"), on="PostalCode")
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## Explore Clusters

In [12]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### Basic Visualizations

In [13]:
# Get the center point of the city Toronto
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the center of Toronto is ({}, {}).'.format(latitude, longitude))

The geograpical coordinate of the center of Toronto is (43.6534817, -79.3839347).


In [14]:
# Create map of Toronto
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# Add markers to map that belongs to th neighbourhoods
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='gray',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

In [15]:
# Show the map
map_toronto

In [16]:
# We restrict our dataset to the boroughs whose names contain the word "Toronto"
toronto_data = df[df['Borough'].str.contains("Toronto")].reset_index(drop=True)
print("Boroughs are restricted to " + ", ".join(toronto_data['Borough'].unique().tolist()))

Boroughs are restricted to Downtown Toronto, East Toronto, West Toronto, East York/East Toronto, Central Toronto, Downtown Toronto Stn A, East Toronto Business


In [17]:
# Create map of the restricted part of Toronto with neighbourhood markers
map_toronto2 = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='gray',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto2)  
    
map_toronto2

### Get venue data via Foursquare API

In [18]:
# Set the necessary parameters to initializa an API call.
CLIENT_ID = 'NGKQMPLWGLFFYY4MSMVY22GZKQADGOTVMYQ1T5IXTTK2QJ4Z'
CLIENT_SECRET = 'NGUPRLUVFBPGNCT2G2PJVF3XNNOIQQO0UEFHOS530YSEWQEQ'
VERSION = '20180605'
LIMIT = 100

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [20]:
# Create a function that returns with all the nearby venues of an area
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results]
        )

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# Retrieve nearby venues of the restricted part of Toronto
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                 latitudes=toronto_data['Latitude'],
                                 longitudes=toronto_data['Longitude'])

In [22]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [23]:
print("We have {} venues in {} boroughs.".format(toronto_venues.shape[0], len(toronto_venues['Neighborhood'].unique())))

We have 1581 venues in 39 boroughs.


In [24]:
# Count the venues in the neighborhoods
toronto_venues['Neighborhood'].value_counts()

First Canadian Place, Underground city                                                                        100
Garden District, Ryerson                                                                                      100
Commerce Court, Victoria Hotel                                                                                100
Harbourfront East, Union Station, Toronto Islands                                                             100
Toronto Dominion Centre, Design Exchange                                                                      100
Enclave of M5E                                                                                                100
Richmond, Adelaide, King                                                                                       92
St. James Town                                                                                                 82
Church and Wellesley                                                                    

In [25]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 231 uniques categories.


### Analyze each neigborhood

In [26]:
# Onehot encoding for the venue categories
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.insert(loc=0, column='Neighbourhood', value=toronto_venues['Neighborhood'])
toronto_onehot.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,...,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
print("We have {} venues in {} categories.".format(toronto_onehot.shape[0], toronto_onehot.shape[1]-1))

We have 1581 venues in 231 categories.


In [28]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,...,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soup Place,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Stadium,Stationery Store,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.05,0.0,0.0,0.0,0.016667,0.016667,0.0,0.033333,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667
2,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,...,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032787,0.032787,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.0,0.016393
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
print(toronto_grouped.shape)

(39, 232)


In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [31]:
num_top_venues = 10

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
  columns.append('Most Common Venue #{}'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,Most Common Venue #1,Most Common Venue #2,Most Common Venue #3,Most Common Venue #4,Most Common Venue #5,Most Common Venue #6,Most Common Venue #7,Most Common Venue #8,Most Common Venue #9,Most Common Venue #10
0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Restaurant,Cheese Shop,Pharmacy,Seafood Restaurant,Beer Bar,Farmers Market,Shopping Mall
1,"Brockton, Parkdale Village, Exhibition Place",Café,Bakery,Breakfast Spot,Coffee Shop,Yoga Studio,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store
2,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Terminal,Airport Lounge,Boat or Ferry,Rental Car Location,Coffee Shop,Sculpture Garden,Harbor / Marina,Bar,Airport Gate
3,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Thai Restaurant,Salad Place,Burger Joint,Bubble Tea Shop,Poke Place
4,Christie,Grocery Store,Café,Park,Candy Store,Baby Store,Italian Restaurant,Athletics & Sports,Coffee Shop,Nightclub,Restaurant


### Clustering the neighbours by distance

In this sevtion we cluster the neighborhoods concerning only their positions on the map. As a result, we expect that the clusters will represent geometrical separation of the neighbors. This is just a test, to ensure that the clustering algorithm works.

In [32]:
# Set number of clusters
kclusters = 4
# Clustering the coordinates of the neighborhoods
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_data[["Latitude", "Longitude"]])
# Add cluster labels to the orifginal data
toronto_merged = toronto_data.copy()
toronto_merged.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged.head()

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,3,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,0,M4E,East Toronto,The Beaches,43.676357,-79.293031
4,3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [33]:
# Create map of Toronto
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

We can see, that the clusters are positioned as a classical clustering by geometrical distance measure.

### Clustering the neighbors by top venues

In this section we cluster the neighborhoods considering their top venues. The neighborhoods with similar type of venue should contained in tha same clusters.

In [34]:
# Set number of clusters
kclusters = 4
# Clustering the coordinates of the neighborhoods
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

In [35]:
# Add cluster labels to the orifginal data
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = toronto_data.copy()
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighborhood')

In [36]:
# Create map of Toronto
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# Set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# Add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
# Investigating the different clusters
for k in range(kclusters):
  print("Cluster #{}".format(k))
  display(toronto_merged.loc[toronto_merged['Cluster Labels'] == k, toronto_merged.columns[[1] + list(range(6, toronto_merged.shape[1]))]])

Cluster #0


Unnamed: 0,Borough,Most Common Venue #1,Most Common Venue #2,Most Common Venue #3,Most Common Venue #4,Most Common Venue #5,Most Common Venue #6,Most Common Venue #7,Most Common Venue #8,Most Common Venue #9,Most Common Venue #10
19,Central Toronto,Ice Cream Shop,Garden,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


Cluster #1


Unnamed: 0,Borough,Most Common Venue #1,Most Common Venue #2,Most Common Venue #3,Most Common Venue #4,Most Common Venue #5,Most Common Venue #6,Most Common Venue #7,Most Common Venue #8,Most Common Venue #9,Most Common Venue #10
9,East York/East Toronto,Park,Convenience Store,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
29,Central Toronto,Park,Playground,Trail,Department Store,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
33,Downtown Toronto,Park,Playground,Trail,Department Store,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


Cluster #2


Unnamed: 0,Borough,Most Common Venue #1,Most Common Venue #2,Most Common Venue #3,Most Common Venue #4,Most Common Venue #5,Most Common Venue #6,Most Common Venue #7,Most Common Venue #8,Most Common Venue #9,Most Common Venue #10
0,Downtown Toronto,Coffee Shop,Park,Bakery,Breakfast Spot,Theater,Pub,Café,French Restaurant,Wine Shop,Health Food Store
1,Downtown Toronto,Coffee Shop,Clothing Store,Japanese Restaurant,Hotel,Bubble Tea Shop,Middle Eastern Restaurant,Café,Italian Restaurant,Cosmetics Shop,Fast Food Restaurant
2,Downtown Toronto,Coffee Shop,Café,Gastropub,Cocktail Bar,Creperie,American Restaurant,Park,Lingerie Store,Seafood Restaurant,Clothing Store
3,East Toronto,Asian Restaurant,Neighborhood,Health Food Store,Trail,Pub,Yoga Studio,Dog Run,Diner,Discount Store,Distribution Center
4,Downtown Toronto,Coffee Shop,Cocktail Bar,Bakery,Restaurant,Cheese Shop,Pharmacy,Seafood Restaurant,Beer Bar,Farmers Market,Shopping Mall
5,Downtown Toronto,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Japanese Restaurant,Thai Restaurant,Salad Place,Burger Joint,Bubble Tea Shop,Poke Place
6,Downtown Toronto,Grocery Store,Café,Park,Candy Store,Baby Store,Italian Restaurant,Athletics & Sports,Coffee Shop,Nightclub,Restaurant
7,Downtown Toronto,Coffee Shop,Café,Restaurant,Gym,Hotel,Deli / Bodega,Clothing Store,Thai Restaurant,Steakhouse,Concert Hall
8,West Toronto,Bakery,Pharmacy,Pet Store,Music Venue,Brewery,Bar,Bank,Café,Supermarket,Middle Eastern Restaurant
10,Downtown Toronto,Coffee Shop,Aquarium,Café,Hotel,Fried Chicken Joint,Italian Restaurant,Scenic Lookout,Restaurant,Brewery,Pizza Place


Cluster #3


Unnamed: 0,Borough,Most Common Venue #1,Most Common Venue #2,Most Common Venue #3,Most Common Venue #4,Most Common Venue #5,Most Common Venue #6,Most Common Venue #7,Most Common Venue #8,Most Common Venue #9,Most Common Venue #10
18,Central Toronto,Park,Swim School,Bus Line,Yoga Studio,Diner,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
