# Coursera-IBM Data Science Capstone
## Segmenting And Clustering Neighborhoods In Toronto
### Part 3
Bofan Chen  
20210107

# Retrieving Data

Standard imports.

In [1]:
# Numpy for array functions.
try:
    import numpy as np
except:
    !pip install numpy
    import numpy as np
# Pandas for data storage and analysis.
try:
    import pandas as pd
except:
    !pip install pandas
    import pandas as pd

Retrieve the geographical coordinates of the city of Toronto.

In [2]:
try:
    from geopy.geocoders import Nominatim
except:
    !pip install geopy
    from geopy.geocoders import Nominatim
address = "Toronto, Ontario"
geo = Nominatim(user_agent = "toronto_capstone")
location = geo.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The geograpical coordinates of Toronto are ({}, {})."\
  .format(latitude, longitude))

The geograpical coordinates of Toronto are (43.6534817, -79.3839347).


Use Folium to visualize the Toronto area and its postal code locations.

In [3]:
# Import the Folium library.
try:
    import folium
except:
    !pip install folium
    import folium

# Create an interactive Folium map of Toronto.
toronto_map = folium.Map(location = [latitude, longitude], zoom_start = 10)

# Add markers to the map to indicate each postal code in the city.
toronto = pd.read_csv("toronto.csv")
for index, row in toronto.iterrows():
    label = "{} ({} - {})".format(row["Neighbourhood"], row["Borough"], row["Postal Code"])
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [row["Latitude"], row["Longitude"]],
        radius = 5,
        popup = label,
        color = "blue",
        fill = True,
        fill_color = "#3186cc",
        fill_opacity = 0.7,
        parse_html = False).add_to(toronto_map)

toronto_map

Use the FourSquare API to examine venues.

In [4]:
# My account codes.
# REDACTED
CLIENT_ID = "4JFWRVJU5A05V4QT5J2OFI3GATZP03T1V2YWQLF2FLHZRVCN"
# REDACTED
CLIENT_SECRET = "NJXJIH43GI5UOVI2RSTUAJOI1GHMOVFE5PRSUVNZLL3TEUJV"
# The version of the Foursquare API to be used.
VERSION = "20180605"
# A default Foursquare API limit value.
LIMIT = 100

In [5]:
# A function that uses FourSquare to retrieve venue info within a certain radius of a coordinate.
def getNearbyVenues(names, latitudes, longitudes, radius = 500):
    
    venues_list = []
    
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # Create the API request URL.
        url = \
          "https://api.foursquare.com/v2/venues/" + \
          "explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}"\
          .format(CLIENT_ID, \
          CLIENT_SECRET, \
          VERSION, \
          lat, \
          lng, \
          radius, \
          LIMIT)
            
        # Make the <GET> request.
        results = requests.get(url).json()["response"]["groups"][0]["items"]
        
        # Return relevant info for each nearby venue.
        venues_list.append([(\
          name, \
          lat, \
          lng, \
          v["venue"]["name"], \
          v["venue"]["location"]["lat"], \
          v["venue"]["location"]["lng"], \
          v["venue"]["categories"][0]["name"]) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ["Neighbourhood", \
      "Neighbourhood Latitude", \
      "Neighbourhood Longitude", \
      "Venue", \
      "Venue Latitude", \
      "Venue Longitude", \
      "Venue Category"]
    
    return(nearby_venues)

In [6]:
# HTTP library to connect to webpages.
import requests

toronto_venues = getNearbyVenues(names = toronto["Neighbourhood"],
    latitudes = toronto["Latitude"],
    longitudes = toronto["Longitude"])

In [7]:
nums = toronto_venues.shape
print("The venues table has " + str(nums[0]) + " rows and " + str(nums[1]) + " columns.")
toronto_venues.head(20)

The venues table has 2121 rows and 7 columns.


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.753259,-79.329656,Bella Vita Catering & Private Chef Service,43.756651,-79.331524,BBQ Joint
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
5,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
6,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant
7,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
8,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
9,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center


In [8]:
# Display the entire data frame, for science.
pd.set_option("display.max_rows", None)

# Display the number of venues for each neighborhood.
toronto_venues[["Neighbourhood", "Venue"]].groupby("Neighbourhood").count().\
  rename(columns = {"Venue" : "Number Of Venues"})

Unnamed: 0_level_0,Number Of Venues
Neighbourhood,Unnamed: 1_level_1
Agincourt,4
"Alderwood, Long Branch",8
"Bathurst Manor, Wilson Heights, Downsview North",21
Bayview Village,4
"Bedford Park, Lawrence Manor East",22
Berczy Park,58
"Birch Cliff, Cliffside West",4
"Brockton, Parkdale Village, Exhibition Place",22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",17
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17


In [9]:
print("There are {} unique categories of venues."\
  .format(len(toronto_venues["Venue Category"].unique())))

There are 272 unique categories of venues.


# Preparing Neighborhood Venue Data

In [10]:
# Use one hot encoding to represent the category of each venue.
toronto_onehot = pd.get_dummies(toronto_venues[["Venue Category"]], \
  prefix = "", prefix_sep = "")

# Add a <Neighborhood> column back to the dataframe.
toronto_onehot["Neighbourhood"] = toronto_venues["Neighbourhood"]

# Move the <Neighborhood> column to be the first column.
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
# Group the onehot dataframe by neighborhood and display venues category proportions per postal code.
toronto_grouped = toronto_onehot.groupby("Neighbourhood").mean().reset_index()
toronto_grouped.head(20)

Unnamed: 0,Neighbourhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# A function to determine the most common venue categories for given postal codes.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [13]:
# Limit the new dataframe to the top 10 venue categories per postal code.
num_top_venues = 10

indicators = ["st", "nd", "rd"]

# Create columns according to the number of top venues.
columns = ["Neighbourhood"]
for i in np.arange(num_top_venues):
    try:
        columns.append("{}{} Most Common Venue".format(i+1, indicators[i]))
    except:
        columns.append("{}th Most Common Venue".format(i+1))

# Create a new dataframe with sorted neighborhood values.
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted["Neighbourhood"] = toronto_grouped["Neighbourhood"]

for i in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[i, 1:] = \
      return_most_common_venues(toronto_grouped.iloc[i, :], num_top_venues)

neighborhoods_venues_sorted.head(20)

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Latin American Restaurant,Skating Rink,Yoga Studio,Eastern European Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Coffee Shop,Pub,Dance Studio,Pharmacy,Gym,Golf Course,Greek Restaurant,Eastern European Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Pet Store,Fried Chicken Joint,Shopping Mall,Sandwich Place,Diner,Supermarket,Middle Eastern Restaurant,Sushi Restaurant
3,Bayview Village,Japanese Restaurant,Café,Bank,Chinese Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Greek Restaurant,Toy / Game Store,Liquor Store,Comfort Food Restaurant,Juice Bar,Butcher,Café
5,Berczy Park,Coffee Shop,Cocktail Bar,Beer Bar,Seafood Restaurant,Farmers Market,Restaurant,Cheese Shop,Bakery,Shopping Mall,Juice Bar
6,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,General Entertainment,Café,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Event Space,Dim Sum Restaurant
7,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Coffee Shop,Climbing Gym,Burrito Place,Stadium,Restaurant,Bar,Italian Restaurant,Bakery
8,"Business reply mail Processing Centre, South C...",Light Rail Station,Pizza Place,Garden,Comic Shop,Gym / Fitness Center,Restaurant,Butcher,Burrito Place,Skate Park,Smoke Shop
9,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Rental Car Location,Bar,Coffee Shop,Sculpture Garden,Plane,Boat or Ferry


# Using $k$-Means Clustering To Group The Postal Codes

In [14]:
toronto_grouped_clustering = toronto_grouped.drop("Neighbourhood", 1)

# Run k-means clustering
from sklearn.cluster import KMeans
k_clusters = 5
kmeans = KMeans(n_clusters = k_clusters, random_state = 0).fit(toronto_grouped_clustering)

# Check the cluster labels generated for each row in the dataframe.
kmeans.labels_[0:10]

array([4, 0, 4, 4, 4, 4, 4, 4, 4, 4], dtype=int32)

In [15]:
# Add cluster labels.
neighborhoods_venues_sorted.insert(0, "Cluster Labels", kmeans.labels_)

In [16]:
# Merge the grouped data with the original data to add latitude/longitude 
#   for each neighborhood.
toronto_merged = toronto
toronto_merged = toronto_merged.join(\
  neighborhoods_venues_sorted.set_index("Neighbourhood"), on = "Neighbourhood")
toronto_merged.drop(columns = ["Unnamed: 0"], inplace = True)
toronto_merged.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Park,Food & Drink Shop,BBQ Joint,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,M4A,North York,Victoria Village,43.725882,-79.315572,4.0,French Restaurant,Portuguese Restaurant,Hockey Arena,Coffee Shop,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4.0,Coffee Shop,Bakery,Café,Park,Pub,Theater,Breakfast Spot,Yoga Studio,Beer Store,Dessert Shop
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,4.0,Clothing Store,Accessories Store,Coffee Shop,Event Space,Furniture / Home Store,Athletics & Sports,Boutique,Vietnamese Restaurant,Coworking Space,Discount Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,4.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Discount Store,Beer Bar,Japanese Restaurant,Italian Restaurant,Diner,Distribution Center,Café
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,0.0,Pizza Place,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Harbor / Marina
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4.0,Fast Food Restaurant,Print Shop,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
7,M3B,North York,Don Mills,43.745906,-79.352188,4.0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant,Athletics & Sports,Sporting Goods Shop,Sandwich Place,Bike Shop,Supermarket
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,4.0,Pizza Place,Gym / Fitness Center,Gastropub,Café,Breakfast Spot,Flea Market,Bank,Intersection,Athletics & Sports,Pharmacy
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Bubble Tea Shop,Cosmetics Shop,Japanese Restaurant,Hotel,Café,Diner,Lingerie Store


In [17]:
# Create a map with different color labels for each postal code cluster.
map_clusters = folium.Map(location = [latitude, longitude], zoom_start = 10)

# Set a color scheme for the clusters.
from matplotlib import cm, colors
x = np.arange(k_clusters)
ys = [i + x + (i*x)**2 for i in range(k_clusters)]
clrs = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(c) for c in clrs]

# Add markers to the map.
marker_colors = []
for lat, lng, nbh, cluster in zip(toronto_merged['Latitude'], \
  toronto_merged["Longitude"], \
  toronto_merged["Neighbourhood"], \
  toronto_merged["Cluster Labels"]):
    # Put gray labels for postal codes excluded by the k-means clustering algorithm.
    if np.isnan(cluster):
        label = folium.Popup(nbh + " (Cluster 0)", parse_html = True)
        folium.CircleMarker(
            [lat, lng],
            radius = 5,
            popup = label,
            color = "gray",
            fill = True,
            fill_color = "gray",
            fill_opacity = 0.7).add_to(map_clusters)
    else:
        label = folium.Popup(nbh + " (Cluster " + str(int(cluster)) + ")", \
          parse_html = True)
        folium.CircleMarker(
            [lat, lng],
            radius = 5,
            popup = label,
            color = rainbow[int(cluster)-1],
            fill = True,
            fill_color = rainbow[int(cluster)-1],
            fill_opacity = 0.7).add_to(map_clusters)
       
map_clusters

# Examining The Postal Code Clusters

In [18]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 0, \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Etobicoke,0.0,Pizza Place,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Harbor / Marina
50,North York,0.0,Intersection,Furniture / Home Store,Pizza Place,Comic Shop,Concert Hall,Falafel Restaurant,Fabric Shop,Event Space,Ethiopian Restaurant,Escape Room
63,York,0.0,Pizza Place,Bus Line,Convenience Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
70,Etobicoke,0.0,Pizza Place,Coffee Shop,Sandwich Place,Discount Store,Chinese Restaurant,Middle Eastern Restaurant,Intersection,Eastern European Restaurant,Electronics Store,Drugstore
72,North York,0.0,Pharmacy,Grocery Store,Coffee Shop,Pizza Place,Comfort Food Restaurant,Dim Sum Restaurant,Fabric Shop,Event Space,Ethiopian Restaurant,Escape Room
93,Etobicoke,0.0,Pizza Place,Sandwich Place,Coffee Shop,Pub,Dance Studio,Pharmacy,Gym,Golf Course,Greek Restaurant,Eastern European Restaurant


Postal codes in cluster 0 all have pizza places in their top four venue categories. There are also a good number of foreign restaurants and coffee shops. These neighborhoods are located in the western half of the city.

In [19]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 1, \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Park,Food & Drink Shop,BBQ Joint,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
21,York,1.0,Women's Store,Park,Pool,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
35,East York,1.0,Intersection,Park,Convenience Store,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
61,Central Toronto,1.0,Swim School,Park,Business Service,Bus Line,Yoga Studio,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
66,North York,1.0,Park,Convenience Store,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
77,Etobicoke,1.0,Sandwich Place,Park,Mobile Phone Shop,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Yoga Studio
85,Scarborough,1.0,Intersection,Playground,Park,Arts & Crafts Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
91,Downtown Toronto,1.0,Park,Playground,Trail,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


Postal codes in cluster 1 prominently feature parks, playgrounds, and other outdoor public spaces.

In [20]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 2, \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Etobicoke,2.0,Filipino Restaurant,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store,Fast Food Restaurant


The lone postal code in cluster 2 contains foreign restaurants that feature Filipino and Eastern European cuisine.

In [21]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 3, \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,North York,3.0,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
64,York,3.0,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store


The two postal codes in cluster 3 appear identical in top venue categories. However, if you examine the table of number of venues examined per neighbhorhood above, both only contained one venue each - a park.

In [22]:
toronto_merged.loc[toronto_merged["Cluster Labels"] == 4, \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,4.0,French Restaurant,Portuguese Restaurant,Hockey Arena,Coffee Shop,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant
2,Downtown Toronto,4.0,Coffee Shop,Bakery,Café,Park,Pub,Theater,Breakfast Spot,Yoga Studio,Beer Store,Dessert Shop
3,North York,4.0,Clothing Store,Accessories Store,Coffee Shop,Event Space,Furniture / Home Store,Athletics & Sports,Boutique,Vietnamese Restaurant,Coworking Space,Discount Store
4,Downtown Toronto,4.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Discount Store,Beer Bar,Japanese Restaurant,Italian Restaurant,Diner,Distribution Center,Café
6,Scarborough,4.0,Fast Food Restaurant,Print Shop,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
7,North York,4.0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant,Athletics & Sports,Sporting Goods Shop,Sandwich Place,Bike Shop,Supermarket
8,East York,4.0,Pizza Place,Gym / Fitness Center,Gastropub,Café,Breakfast Spot,Flea Market,Bank,Intersection,Athletics & Sports,Pharmacy
9,Downtown Toronto,4.0,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Bubble Tea Shop,Cosmetics Shop,Japanese Restaurant,Hotel,Café,Diner,Lingerie Store
10,North York,4.0,Pizza Place,Pub,Japanese Restaurant,Asian Restaurant,Bakery,Park,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant
12,Scarborough,4.0,Construction & Landscaping,Bar,Yoga Studio,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Escape Room


Postal codes in cluster 4, the largest cluster, contain a wide variety of establishments. Coffee shops, cafes, bars, pizza places, and ethnic restaurants are common in many of them.

In [23]:
toronto_merged.loc[np.isnan(toronto_merged["Cluster Labels"]), \
  toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,North York,,,,,,,,,,,
95,Scarborough,,,,,,,,,,,


The FourSquare request did not include any venues located in the above two postal codes.