# Assignment - Segmenting Neighborhoods in Toronto

### Prepare Notebook with Postal code, Borough, Neighborhood information

#### Import required libraries

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

#### Create URL and Beautiful Soup object for Webscraping

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = requests.get(url).text

soup = BeautifulSoup(data, 'html.parser')

#### Create empty list, loop through table contents to create dictionary & dataframe

In [3]:
table_contents = []
table = soup.find('table')
rows = table.find_all('td')

for row in rows:
    # create empty dictionary to assign postal code, borough, neighborhood values for each cell:
    cell = {}
    # ensure to eliminate cells that are Not Assigned:
    if row.span.text == 'Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = row.span.text.split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /', ',')).replace(')', ' ')).strip(' ')
        # append dictionary to empty list:
        table_contents.append(cell)
        
df = pd.DataFrame(table_contents)

# print df contents:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [4]:
# replace contents of long cells in df['Borough']
df['Borough'] = df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

# re-check contents of df
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [5]:
df.shape

(103, 3)

### Get Latitude/Longitude of each Neighborhood - Geocoder

#### Initialize Lat/Long variables to empty, loop to get Lat/Long coordinates for each postal code

In [7]:
# unable to get Geocoder to work properly. Will use the Geospatial Dataset csv file provided in the course materials:
!wget -q -O 'Geospatial_Coordinates.csv' https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv

print('Data Downloaded')

Data Downloaded


In [8]:
# load geospatial data to a pandas dataframe
geospatial_data = pd.read_csv('Geospatial_Coordinates.csv')
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
# common column (postal code) between both df and geospatial_data. Join dataframes, on common Postal Code, to get Latitude/Longitude
toronto_data = pd.merge(
                        left = df,
                        right = geospatial_data,
                        how = 'inner',
                        on = None,
                        left_on = 'PostalCode',
                        right_on = 'Postal Code'
)
toronto_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",M6A,43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,M7A,43.662301,-79.389494
...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",M8X,43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.665860,-79.383160
100,M7Y,East Toronto Business,Enclave of M4L,M7Y,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",M8Y,43.636258,-79.498509


In [10]:
# drop redundant Postal Code column:
toronto_data.drop(['Postal Code'], axis = 1, inplace = True)
toronto_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto Business,Enclave of M4L,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [11]:
toronto_data.shape

(103, 5)

### Explore and Cluster Toronto Neighborhoods

#### Create a map of Toronto Neighborhoods w/ Folium

In [12]:
!conda install -c conda-forge folium=0.5.0 --yes
from geopy.geocoders import Nominatim 
import folium

# get geographical location of Toronto:
address = 'Toronto, On'
geolocator = Nominatim(user_agent='to_expolrer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create a map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start = 10)

# add markers for each neighborhood:
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}; {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### There are a lot of neighborhoods in Toronto. Let's simplify the map by including only those in Downtoen Toronto

In [13]:
downtown_data = toronto_data[toronto_data['Borough'] == 'Downtown Toronto'].reset_index(drop = True)
downtown_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [14]:
# create same map, but only for Downtown Toronto
map_dt_toronto = folium.Map(location=[latitude, longitude], zoom_start = 10)

# add markers for each neighborhood:
for lat, lng, borough, neighborhood in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Borough'], downtown_data['Neighborhood']):
    label = '{}; {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dt_toronto)  
    
map_dt_toronto

### Fetch Data for Downtown Toronto Neighborhoods

#### Foursquare API - Credentials and Version

In [15]:
# define client id, client secret, version, limit 
client_id = '5RVW3EIU3YSZYVPJ3SGA5DRLCDYRN5KUI4GR00YK0NDHY0UO'
client_secret = '5J5NGC0VGV0GCWITLMCO3QJHKDWW22O4NFECEPS0EEMFY4MS'
version = '20210901'
limit = 100

print('Credentials:')
print('Client ID: ', client_id)
print('Client Secret: ', client_secret)

Credentials:
Client ID:  5RVW3EIU3YSZYVPJ3SGA5DRLCDYRN5KUI4GR00YK0NDHY0UO
Client Secret:  5J5NGC0VGV0GCWITLMCO3QJHKDWW22O4NFECEPS0EEMFY4MS


#### Explore Neighborhoods in Downtown Toronto

In [16]:
# create function to look for venues within 500 m of the centre of each neighborhood:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            client_id, 
            client_secret, 
            version, 
            lat, 
            lng, 
            radius, 
            limit)
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [17]:
# code to convert each neighborhood in downtown_data and create a new dataframe called downtown_venues:
downtown_venues = getNearbyVenues(names = downtown_data['Neighborhood'], latitudes = downtown_data['Latitude'], longitudes = downtown_data['Longitude'])

Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [18]:
# check shape and first 5 values of dataframe:
print('dataframe shape: ', downtown_venues.shape)
downtown_venues.head()

dataframe shape:  (1113, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [19]:
# check how many venues we're retruned per neighborhood group:
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,57,57,57,57,57,57
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16
Central Bay Street,67,67,67,67,67,67
Christie,16,16,16,16,16,16
Church and Wellesley,78,78,78,78,78,78
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
"First Canadian Place, Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East, Union Station, Toronto Islands",100,100,100,100,100,100
"Kensington Market, Chinatown, Grange Park",66,66,66,66,66,66


In [20]:
# how many unique categories returned via the returned venues
print('There are {} unique categories'.format(len(downtown_venues['Venue Category'].unique())))

There are 209 unique categories


### One-hot Encode/Dummy Variables for each venue type in each Neighborhood

In [21]:
# one hot encode each venue category and append the neighborhood
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix = "", prefix_sep="")
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood']

# rearrange columns so that neighborhood is the first column - pop neighborhood and insert it as first column:
first_column = downtown_onehot.pop('Neighborhood')
downtown_onehot.insert(0, 'Neighborhood', first_column)

downtown_onehot.head()

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arepa Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
# Group rows by Neighborhood groups - take the mean of the frequency of occurence of each venue category
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Arepa Restaurant,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.0625,0.0625,0.125,0.1875,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.014925,0.0,0.0,0.0,0.0,0.014925
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,...,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.025641
5,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01
6,"First Canadian Place, Underground city",0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,...,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0
8,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,...,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015152,...,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.015152,0.0,0.0


#### Evaluate/print each neighborhood along with the most common venue types

In [23]:
def most_common_venues(row, num_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    return row_categories_sorted.index.values[0:num_venues]

In [24]:
# use function to create dataframe
num_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to the number of venues specified
columns = ['Neighborhood']
for ind in np.arange(num_venues):
    try: 
        columns.append('{}{} Most Common Venue'.format(ind + 1, indicators[ind]))
    except: 
        columns.append('{}th Most common Venue'.format(ind + 1))
        
# create new dataframe
downtown_neighborhood_venues_sorted = pd.DataFrame(columns = columns)
downtown_neighborhood_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    downtown_neighborhood_venues_sorted.iloc[ind, 1:] = most_common_venues(downtown_grouped.iloc[ind, :], num_venues)

downtown_neighborhood_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Beer Bar,Restaurant,Seafood Restaurant,Cheese Shop,Breakfast Spot,Eastern European Restaurant
1,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Plane,Boat or Ferry,Sculpture Garden,Rental Car Location
2,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Japanese Restaurant,Restaurant,Salad Place,Burger Joint,Spa,Bubble Tea Shop
3,Christie,Grocery Store,Café,Park,Baby Store,Candy Store,Italian Restaurant,Athletics & Sports,Restaurant,Coffee Shop,Nightclub
4,Church and Wellesley,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Gay Bar,Restaurant,Hotel,Mediterranean Restaurant,Men's Store,Fast Food Restaurant,Dance Studio


### Cluster Neighborhoods

In [25]:
# import required libraries

from sklearn.cluster import KMeans

In [26]:
# set up max number of clusters
num_clusters = 4

# define grouped downtown datafram for clustering. Drop Neighborhood label
downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# define algorithm:
kmeans = KMeans(init = 'k-means++', n_clusters = 4, random_state = 0)
# fit algorithm:
kmeans.fit(downtown_grouped_clustering)
# labels
labels = kmeans.labels_

# insert cluster labels into the downtown_neighborhood_venues_sorted dataframe:
downtown_neighborhood_venues_sorted.insert(0, 'Cluster Labels', labels)
downtown_neighborhood_venues_sorted

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
0,0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Beer Bar,Restaurant,Seafood Restaurant,Cheese Shop,Breakfast Spot,Eastern European Restaurant
1,3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Plane,Boat or Ferry,Sculpture Garden,Rental Car Location
2,0,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Japanese Restaurant,Restaurant,Salad Place,Burger Joint,Spa,Bubble Tea Shop
3,2,Christie,Grocery Store,Café,Park,Baby Store,Candy Store,Italian Restaurant,Athletics & Sports,Restaurant,Coffee Shop,Nightclub
4,0,Church and Wellesley,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Gay Bar,Restaurant,Hotel,Mediterranean Restaurant,Men's Store,Fast Food Restaurant,Dance Studio
5,0,"Commerce Court, Victoria Hotel",Coffee Shop,Restaurant,Hotel,Café,Gym,American Restaurant,Deli / Bodega,Bakery,Seafood Restaurant,Japanese Restaurant
6,0,"First Canadian Place, Underground city",Coffee Shop,Café,Hotel,Restaurant,Japanese Restaurant,Gym,Salad Place,Steakhouse,Deli / Bodega,Asian Restaurant
7,0,"Garden District, Ryerson",Clothing Store,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Café,Bubble Tea Shop,Cosmetics Shop,Pizza Place,Fast Food Restaurant,Bookstore
8,0,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Brewery,Fried Chicken Joint,Restaurant,Sporting Goods Shop,Baseball Stadium
9,0,"Kensington Market, Chinatown, Grange Park",Café,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Burger Joint,Caribbean Restaurant,Grocery Store,Mexican Restaurant,Gaming Cafe,Bar


In [28]:
# define a downtown_merged dataframe. Initially equal to the downtown_data df
downtown_merged = downtown_data

# merge downtown_merged with downtown neighborhood evnues dataframe to get cluser labels and most common venues for each Neighborhood:
downtown_merged = downtown_merged.join(downtown_neighborhood_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
downtown_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Café,Pub,Bakery,Park,Theater,Gym / Fitness Center,Farmers Market,Chocolate Shop,Dessert Shop
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Café,Bubble Tea Shop,Cosmetics Shop,Pizza Place,Fast Food Restaurant,Bookstore
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Italian Restaurant,Clothing Store,Cocktail Bar,Restaurant,Cosmetics Shop,Creperie,Farmers Market,Moroccan Restaurant
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Beer Bar,Restaurant,Seafood Restaurant,Cheese Shop,Breakfast Spot,Eastern European Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Japanese Restaurant,Restaurant,Salad Place,Burger Joint,Spa,Bubble Tea Shop
5,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Grocery Store,Café,Park,Baby Store,Candy Store,Italian Restaurant,Athletics & Sports,Restaurant,Coffee Shop,Nightclub
6,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,0,Coffee Shop,Café,Clothing Store,Gym,Bar,Restaurant,Hotel,Vegetarian / Vegan Restaurant,Thai Restaurant,Lounge
7,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0,Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Brewery,Fried Chicken Joint,Restaurant,Sporting Goods Shop,Baseball Stadium
8,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,0,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Bakery,Salad Place,Seafood Restaurant,Japanese Restaurant,Lounge
9,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Restaurant,Hotel,Café,Gym,American Restaurant,Deli / Bodega,Bakery,Seafood Restaurant,Japanese Restaurant


### Visualizing the resulting/modeled clusters

In [30]:
# import required libraries:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [38]:
# create a map centered in Toronto
cluster_map = folium.Map(location=[latitude, longitude], zoom_start = 12)

# set color scheme for clusters
x = np.arange(num_clusters)
ys = [i + x + (i*x)**2 for i in range(num_clusters)]


color_list = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in color_list]
    
# add map markers
marker_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(cluster_map)
       
cluster_map

### Examine Clusters

#### Examine each cluster by its cluster label, and common venue categories

##### Cluster 1 (Cluster Label  0)

In [40]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Café,Pub,Bakery,Park,Theater,Gym / Fitness Center,Farmers Market,Chocolate Shop,Dessert Shop
1,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Café,Bubble Tea Shop,Cosmetics Shop,Pizza Place,Fast Food Restaurant,Bookstore
2,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Italian Restaurant,Clothing Store,Cocktail Bar,Restaurant,Cosmetics Shop,Creperie,Farmers Market,Moroccan Restaurant
3,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Beer Bar,Restaurant,Seafood Restaurant,Cheese Shop,Breakfast Spot,Eastern European Restaurant
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Japanese Restaurant,Restaurant,Salad Place,Burger Joint,Spa,Bubble Tea Shop
6,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,0,Coffee Shop,Café,Clothing Store,Gym,Bar,Restaurant,Hotel,Vegetarian / Vegan Restaurant,Thai Restaurant,Lounge
7,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,0,Coffee Shop,Aquarium,Café,Hotel,Scenic Lookout,Brewery,Fried Chicken Joint,Restaurant,Sporting Goods Shop,Baseball Stadium
8,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,0,Coffee Shop,Hotel,Café,Restaurant,Italian Restaurant,Bakery,Salad Place,Seafood Restaurant,Japanese Restaurant,Lounge
9,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Restaurant,Hotel,Café,Gym,American Restaurant,Deli / Bodega,Bakery,Seafood Restaurant,Japanese Restaurant
10,M5S,Downtown Toronto,"University of Toronto, Harbord",43.662696,-79.400049,0,Café,Japanese Restaurant,Bakery,Bookstore,Bar,Yoga Studio,Beer Bar,Beer Store,Sandwich Place,Nightclub


##### Cluster 2 (Cluster Label 1)

In [41]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
13,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,1,Park,Playground,Trail,Airport,Museum,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant


##### Cluster 3 (Cluster Label 2)

In [43]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
5,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Grocery Store,Café,Park,Baby Store,Candy Store,Italian Restaurant,Athletics & Sports,Restaurant,Coffee Shop,Nightclub


##### Cluster 4 (Cluster Label 3)

In [44]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most common Venue,5th Most common Venue,6th Most common Venue,7th Most common Venue,8th Most common Venue,9th Most common Venue,10th Most common Venue
12,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442,3,Airport Service,Airport Lounge,Airport Terminal,Airport,Coffee Shop,Boutique,Plane,Boat or Ferry,Sculpture Garden,Rental Car Location


#### From analysis, the clusters can be grouped/assigned by:

- Cluster 1: Hotels/Dining
- Cluster 2: Outdoors/Leisure
- Cluster 3: Mixed Services
- Cluster 4: Airport Services

#### Note most of the clusters, with the exception of Airport Services, tend to overlap after the 4th - 5th most common venue, making assignment difficult. 

Additionally, most of the areas within downtown Toronto (14/17) fall comfortably into the first Cluster, indicating that Restaurants/Nightlife/Services are the predominant venue in the area of study