In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
# ---- insertion by chakri
!conda install -c conda-forge geopy=2.0 --yes
import geopy
from geopy.geocoders import Nominatim # for converting a location address into latitude and longitude values
print('Geopy installed and imported!')
# ------ end of insertion
#
from pandas.io.json import json_normalize  # for tranforming a JSON file into a pandas dataframe
#
#import folium # Importing map rendering library
# ---- new ----
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Folium installed and imported!')
# ---- end of new ---
# importing k-means from clustering stage
from sklearn.cluster import KMeans
#
# Importing Matplotlib and associated plotting modules for plotting needs
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
#

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Geopy installed and imported!
Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Folium installed and imported!


In [None]:
# Beging to scrape the RAW table for forming data frame needs
#
#source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
#soup = BeautifulSoup(source, 'lxml')
#print('Hello')

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.tbody.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
    
    # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned\n":
       # print(row[1])
       # print('Yes in condition')
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned\n" in row[2]: 
            row[2] = row[1]
        res.append(row)

# Dataframe with 3 columns
df = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
df.head()
#

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A\n,North York\n,Parkwoods\n
1,M4A\n,North York\n,Victoria Village\n
2,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"
3,M6A\n,North York\n,"Lawrence Manor, Lawrence Heights\n"
4,M7A\n,Downtown Toronto\n,"Queen's Park, Ontario Provincial Government\n"


In [3]:

# Removing '\n' at the end of text
df["PostalCode"] = df["PostalCode"].str.replace("\n","")
df["Borough"] = df["Borough"].str.replace("\n","")
df["Neighborhood"] = df["Neighborhood"].str.replace("\n","")
df.head()
#

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
# Grouping all neighborhoods having the same postal code
df = df.groupby(["PostalCode", "Borough"])["Neighborhood"].apply(", ".join).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [5]:
# Using the .shape method to print the number of rows of your dataframe.
print("Shape: ", df.shape)
#

Shape:  (103, 3)


In [6]:
# Getting the latitude and the longitude coordinates of each neighborhood into a data frame
df_geo_coordinates = pd.read_csv("http://cocl.us/Geospatial_data")
df_geo_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [7]:
# Creating data frame having neighbourhood details and thier geographic coordinates
df_toronto_data = pd.merge(df, df_geo_coordinates, how='left', left_on = 'PostalCode', right_on = 'Postal Code')
# remove the "Postal Code" column
df_toronto_data.drop("Postal Code", axis=1, inplace=True)
df_toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [28]:
# Exploring and clustering the neighborhoods in Toronto
#
# Getting the longitude and latitude of Toronto
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
#location_val = geolocator.geocode(address)
#latitude_val = location_val.latitude
#longitude_val = location_val.longitude
# -----
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The Geo coordinates of Toronto city are {}, {}.'.format(latitude, longitude))

The Geo coordinates of Toronto city are 43.6534817, -79.3839347.


In [29]:
# Creating map of the Toronto City with neighborhoods superimposed on top of map
map_toronto_city = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto_city

In [30]:
# Adding markers to above map 
for lat, lng, borough, neighborhood in zip(
        df_toronto_data['Latitude'], 
        df_toronto_data['Longitude'], 
        df_toronto_data['Borough'], 
        df_toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_city)  

map_toronto_city

In [31]:
# Creating a dataframe having neighbourhoods where borought contains word 'Toronto'
df_toronto_brgh = df_toronto_data[df_toronto_data['Borough'].str.contains("Toronto")].reset_index(drop=True)
df_toronto_brgh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [32]:
# Plotting map for this restricted region of boroughs having 'Toronto' as part of their name and add required markers 
map_toronto_brgh = folium.Map(location=[latitude_val, longitude_val], zoom_start=12)
for lat, lng, borough, neighborhood in zip(
        df_toronto_brgh['Latitude'], 
        df_toronto_brgh['Longitude'], 
        df_toronto_brgh['Borough'], 
        df_toronto_brgh['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_brgh)  

map_toronto_brgh

In [33]:
# Defining Foursquare Credentials and Version using details of my Foursquare Details
#CLIENT_ID = ''
#CLIENT_SECRET = ''
#VERSION = ''
#
CLIENT_ID = '5YCOLTZBQP1VEBLHVGTD5ZLU5BJEDRZLOI0SPFU0BVO22NAM' # My Foursquare ID
CLIENT_SECRET = 'KJIKRCVUGUYIFYZUII4W5LZKSUMROGB0EALDOAJ3WVJRKZ2M' # My Foursquare Secret
#VERSION = '20180604'
VERSION = '20200604'
#CLIENT_SECRET = 'your-client-secret' # your Foursquare Secret

In [34]:
# Explore the first neighborhood in our data frame "df_toronto_data"
#
first_neighborhood_name = df_toronto_brgh.loc[0, 'Neighborhood']
#
print(f"The first neighborhood's name is '{first_neighborhood_name}'.")
# Getting the neighborhood's latitude and longitude values.
first_neighborhood_latitude = df_toronto_brgh.loc[0, 'Latitude'] # neighborhood latitude value
first_neighborhood_longitude = df_toronto_brgh.loc[0, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(first_neighborhood_name, 
                                                               first_neighborhood_latitude, 
                                                               first_neighborhood_longitude))

The first neighborhood's name is 'The Beaches'.
Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


In [35]:
# Getting the top 100 venues that are in The Beaches within a radius of 500 meters neighbourhoods having 'Toronto' in name.
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    first_neighborhood_latitude, 
    first_neighborhood_longitude, 
    radius, 
    LIMIT)

# abstracting the result to a json file
results = requests.get(url).json()

In [36]:
# Defining a function that gets the category of venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [37]:
# Creating a dataframe by cleansing the json file
venues = results['response']['groups'][0]['items']
#nearby_venues_data = json_normalize(venues) # flatten JSON
nearby_venues_data = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns_data = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues_data =nearby_venues_data.loc[:, filtered_columns_data]

# filter the category for each row
nearby_venues_data['venue.categories'] = nearby_venues_data.apply(get_category_type, axis=1)

# clean columns
nearby_venues_data.columns = [col.split(".")[-1] for col in nearby_venues_data.columns]

nearby_venues_data
# -------------------------------

Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
2,Grover Pub and Grub,Pub,43.679181,-79.297215
3,Upper Beaches,Neighborhood,43.680563,-79.292869


In [38]:
# Exploring neighborhoods in a part of Toronto City, having 'Toronto' in their name
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results_data = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results_data])

    nearby_venues_details = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues_details.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues_details)

In [39]:
# Code to invoke above function on each neighborhood of Toronto and create a new dataframe called toronto_brgh_venues
toronto_brgh_venues = getNearbyVenues(names=df_toronto_brgh['Neighborhood'],
                                   latitudes=df_toronto_brgh['Latitude'],
                                   longitudes=df_toronto_brgh['Longitude']
                                  )
#
toronto_brgh_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


In [41]:
# checking on # of venues that were returned for each neighborhood.
toronto_brgh_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,55,55,55,55,55,55
"Brockton, Parkdale Village, Exhibition Place",25,25,25,25,25,25
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",17,17,17,17,17,17
Central Bay Street,68,68,68,68,68,68
Christie,16,16,16,16,16,16
Church and Wellesley,75,75,75,75,75,75
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,34,34,34,34,34,34
Davisville North,9,9,9,9,9,9


In [42]:
# Determining # of unique categories can be curated from all the returned venues
print('There are {} uniques categories of venues.'.format(len(toronto_brgh_venues['Venue Category'].unique())))

There are 240 uniques categories of venues.


In [43]:
# Analyzing each neighbourhood
# one hot encoding
toronto_brgh_onehot = pd.get_dummies(toronto_brgh_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_brgh_onehot['Neighborhood'] = toronto_brgh_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_brgh_onehot.columns[-1]] + list(toronto_brgh_onehot.columns[:-1])
toronto_brgh_onehot = toronto_brgh_onehot[fixed_columns]

toronto_brgh_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Grouping data by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_brgh_grouped = toronto_brgh_onehot.groupby('Neighborhood').mean().reset_index()
toronto_brgh_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.018182,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.117647,0.058824,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0


In [44]:
# Identifying the 10 most common venues in each neighborhood
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted_data = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_data['Neighborhood'] = toronto_brgh_grouped['Neighborhood']

for ind in np.arange(toronto_brgh_grouped.shape[0]):
    neighborhoods_venues_sorted_data.iloc[ind, 1:] = return_most_common_venues(toronto_brgh_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted_data.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Farmers Market,Bakery,Beer Bar,Cocktail Bar,Cheese Shop,Seafood Restaurant,Restaurant,Grocery Store,Pub
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Climbing Gym,Bar,Bookstore,Burrito Place,Restaurant,Playground
2,"Business reply mail Processing Centre, South C...",Park,Recording Studio,Restaurant,Light Rail Station,Auto Workshop,Fast Food Restaurant,Farmers Market,Burrito Place,Pizza Place,Butcher
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Plane,Harbor / Marina,Boutique,Boat or Ferry,Rental Car Location,Bar,Historic Site,Coffee Shop
4,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Department Store,Japanese Restaurant,Thai Restaurant,Burger Joint,Bubble Tea Shop,Salad Place


In [45]:
# Using k-means to cluster the neighborhood into 5 clusters.
#
# set number of clusters
kclusters = 5

toronto_brgh_grouped_clustering = toronto_brgh_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_brgh_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
#

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [46]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood
# add clustering labels
neighborhoods_venues_sorted_data.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_brgh_merged = df_toronto_brgh

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_brgh_merged = toronto_brgh_merged.join(neighborhoods_venues_sorted_data.set_index('Neighborhood'), on='Neighborhood')

toronto_brgh_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,1,Health Food Store,Pub,Trail,Dive Bar,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Women's Store
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Furniture / Home Store,Bookstore,Cosmetics Shop,Ice Cream Shop,Pub,Pizza Place
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,0,Park,Pizza Place,Ice Cream Shop,Pub,Fish & Chips Shop,Steakhouse,Sushi Restaurant,Burrito Place,Restaurant,Italian Restaurant
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Coffee Shop,Bakery,Gastropub,Brewery,Café,American Restaurant,Convenience Store,Seafood Restaurant,Cheese Shop,Clothing Store
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Swim School,Bus Line,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar,Distribution Center


In [47]:
# visualizing the resulting clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(
        toronto_brgh_merged['Latitude'], 
        toronto_brgh_merged['Longitude'], 
        toronto_brgh_merged['Neighborhood'], 
        toronto_brgh_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [49]:
# Examining each cluster to determine the discriminating venue categories that distinguish each cluster.
# Cluster 1
toronto_brgh_merged.loc[toronto_brgh_merged['Cluster Labels'] == 0, toronto_brgh_merged.columns[[1] + list(range(5, toronto_brgh_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,0,Greek Restaurant,Coffee Shop,Italian Restaurant,Restaurant,Furniture / Home Store,Bookstore,Cosmetics Shop,Ice Cream Shop,Pub,Pizza Place
2,East Toronto,0,Park,Pizza Place,Ice Cream Shop,Pub,Fish & Chips Shop,Steakhouse,Sushi Restaurant,Burrito Place,Restaurant,Italian Restaurant
3,East Toronto,0,Coffee Shop,Bakery,Gastropub,Brewery,Café,American Restaurant,Convenience Store,Seafood Restaurant,Cheese Shop,Clothing Store
5,Central Toronto,0,Park,Hotel,Breakfast Spot,Dog Run,Sandwich Place,Food & Drink Shop,Department Store,Dance Studio,Gym / Fitness Center,Concert Hall
6,Central Toronto,0,Coffee Shop,Clothing Store,Yoga Studio,Bagel Shop,Furniture / Home Store,Ice Cream Shop,Fast Food Restaurant,Diner,Mexican Restaurant,Chinese Restaurant
7,Central Toronto,0,Pizza Place,Dessert Shop,Sandwich Place,Café,Coffee Shop,Italian Restaurant,Gym,Sushi Restaurant,Park,Farmers Market
9,Central Toronto,0,Coffee Shop,Supermarket,Pub,Bagel Shop,Sushi Restaurant,Bank,Fried Chicken Joint,Restaurant,Pizza Place,American Restaurant
11,Downtown Toronto,0,Coffee Shop,Restaurant,Pizza Place,Café,Chinese Restaurant,Italian Restaurant,Park,Pub,Bakery,Market
12,Downtown Toronto,0,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Yoga Studio,Men's Store,Café,Bubble Tea Shop,Pub
13,Downtown Toronto,0,Coffee Shop,Park,Bakery,Pub,Café,Theater,Breakfast Spot,Shoe Store,Distribution Center,Electronics Store


In [50]:
# Cluster 2
toronto_brgh_merged.loc[toronto_brgh_merged['Cluster Labels'] == 1, toronto_brgh_merged.columns[[1] + list(range(5, toronto_brgh_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,1,Health Food Store,Pub,Trail,Dive Bar,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Women's Store
10,Downtown Toronto,1,Park,Trail,Tennis Court,Playground,Doner Restaurant,Dog Run,Donut Shop,Dive Bar,Dance Studio,Distribution Center
23,Central Toronto,1,Park,Sushi Restaurant,Jewelry Store,Trail,Mexican Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar


In [51]:
# Clouster 3
toronto_brgh_merged.loc[toronto_brgh_merged['Cluster Labels'] == 2, toronto_brgh_merged.columns[[1] + list(range(5, toronto_brgh_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,2,Trail,Playground,Women's Store,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar


In [52]:
# Cluster 4
toronto_brgh_merged.loc[toronto_brgh_merged['Cluster Labels'] == 3, toronto_brgh_merged.columns[[1] + list(range(5, toronto_brgh_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,3,Park,Swim School,Bus Line,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Dive Bar,Distribution Center


In [53]:
# Cluster 5
toronto_brgh_merged.loc[toronto_brgh_merged['Cluster Labels'] == 4, toronto_brgh_merged.columns[[1] + list(range(5, toronto_brgh_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,4,Dive Bar,Music Venue,Garden,Women's Store,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
