# Create Dataframe

In [1]:
import pandas as pd

In [2]:
table_TO = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [3]:
print(f'Total tables: {len(table_TO)}')

Total tables: 3


In [4]:
#Guess that it is the first table
df = table_TO[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
#Size before cleaning
df.shape

(180, 3)

In [6]:
#drop non asigned rows
df2=df[df.Borough != 'Not assigned']
df2.shape

(103, 3)

In [7]:
#See if there are any "Not assigned" Neigborhoods
for i in df2.columns:
    df2[i].astype('str').apply(lambda x: print(df2[i].name) if x.startswith('Not assigned') else 'pass')

In [8]:
#Combine rows with same Borough
df2['Neighbourhood'] = df.groupby(['Postal Code'])['Neighbourhood'].transform(lambda x : ','.join(x)) 
df2 = df2.drop_duplicates(subset=['Neighbourhood'])  
df2=df.loc[(df['Postal Code'] == 'M5G') | (df['Postal Code'] == 'M2H')| (df['Postal Code'] == 'M4B') 
               | (df['Postal Code'] == 'M1J') | (df['Postal Code'] == 'M4G') | (df['Postal Code'] == 'M4M')
                | (df['Postal Code'] == 'M1R') | (df['Postal Code'] == 'M9V') | (df['Postal Code'] == 'M9L')
                   | (df['Postal Code'] == 'M5V') | (df['Postal Code'] == 'M1B') | (df['Postal Code'] == 'M5A')]
df2.reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Neighbourhood'] = df.groupby(['Postal Code'])['Neighbourhood'].transform(lambda x : ','.join(x))


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M5A,Downtown Toronto,"Regent Park, Harbourfront"
1,M1B,Scarborough,"Malvern, Rouge"
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M4G,East York,Leaside
4,M5G,Downtown Toronto,Central Bay Street
5,M2H,North York,Hillcrest Village
6,M1J,Scarborough,Scarborough Village
7,M9L,North York,Humber Summit
8,M4M,East Toronto,Studio District
9,M1R,Scarborough,"Wexford, Maryvale"


In [9]:
df2.shape

(12, 3)

# Add Zipcodes

In [10]:
PCLATLON=pd.read_csv('GSPC.csv')

In [11]:
PCLATLON.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
zips = pd.merge(df2,
                 PCLATLON[['Postal Code','Latitude', 'Longitude']],on='Postal Code')
zips.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
2,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
3,M4G,East York,Leaside,43.70906,-79.363452
4,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


# Explore and Cluster

In [13]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto with neighborhoods

In [16]:
import folium # map rendering library

# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(zips['Latitude'], zips['Longitude'], zips['Borough'], zips['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [17]:
CLIENT_ID = 'S3IVNSJ3WG4Q2JY222OV4EZE4NFJP0AWDY3OX5ZXJ4ZAS0RG' # your Foursquare ID
CLIENT_SECRET = 'V3UPS1CTX4KSEPQ2ETFBDRQIRT3T4KEAJKQ2NG24EOZRICND' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: S3IVNSJ3WG4Q2JY222OV4EZE4NFJP0AWDY3OX5ZXJ4ZAS0RG
CLIENT_SECRET:V3UPS1CTX4KSEPQ2ETFBDRQIRT3T4KEAJKQ2NG24EOZRICND


#### Let's explore the first neighborhood in our dataframe.

In [18]:
zips.loc[0, 'Neighbourhood']

'Regent Park, Harbourfront'

In [19]:
#Get the neighborhood's latitude and longitude values.
neighborhood_latitude = zips.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = zips.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = ['Regent Park'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of ['Regent Park'] are 43.6542599, -79.3606359.


In [20]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=S3IVNSJ3WG4Q2JY222OV4EZE4NFJP0AWDY3OX5ZXJ4ZAS0RG&client_secret=V3UPS1CTX4KSEPQ2ETFBDRQIRT3T4KEAJKQ2NG24EOZRICND&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [23]:
#send GET request
import requests # library to handle requests
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '6001a43e871ccd477887d34c'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 48,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
import json # library to handle JSON files

venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
3,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

48 venues were returned by Foursquare.


#### Let's create a function to repeat the same process to all the neighborhoods in Toronto

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=zips['Neighbourhood'],
                                   latitudes=zips['Latitude'],
                                   longitudes=zips['Longitude']
                                  )

Regent Park, Harbourfront
Malvern, Rouge
Parkview Hill, Woodbine Gardens
Leaside
Central Bay Street
Hillcrest Village
Scarborough Village
Humber Summit
Studio District
Wexford, Maryvale
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens


In [31]:
#Lets check the dataframe
print(toronto_venues.shape)
toronto_venues.head()

(228, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [33]:
#group by venue
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",14,14,14,14,14,14
Central Bay Street,61,61,61,61,61,61
Hillcrest Village,6,6,6,6,6,6
Humber Summit,1,1,1,1,1,1
Leaside,32,32,32,32,32,32
"Malvern, Rouge",1,1,1,1,1,1
"Parkview Hill, Woodbine Gardens",11,11,11,11,11,11
"Regent Park, Harbourfront",48,48,48,48,48,48
Scarborough Village,2,2,2,2,2,2
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",10,10,10,10,10,10


In [34]:
#How many unique values
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 109 uniques categories.


## Analyze Each Neighborhood

In [35]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Art Museum,...,Sports Bar,Stationery Store,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Video Store,Wine Bar,Wine Shop
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
#What is the size of the new dataframe
toronto_onehot.shape

(228, 109)

In [37]:
#Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,...,Sports Bar,Stationery Store,Supermarket,Sushi Restaurant,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Video Store,Wine Bar,Wine Shop
0,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.071429,0.071429,0.142857,0.214286,0.142857,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Central Bay Street,0.016393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.016393,0.016393,0.0,0.016393,0.0,0.016393,0.0
2,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Leaside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.03125,0.0,0.03125,0.0625,0.0,0.0,0.0,0.0,0.0,0.0
5,"Malvern, Rouge",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"Parkview Hill, Woodbine Gardens",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Regent Park, Harbourfront",0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.020833,...,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.020833
8,Scarborough Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0


In [39]:
#size
toronto_grouped.shape

(12, 109)

In [41]:
#Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
              venue  freq
0   Airport Service  0.21
1    Airport Lounge  0.14
2  Airport Terminal  0.14
3   Harbor / Marina  0.07
4           Airport  0.07


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.20
1                Café  0.08
2  Italian Restaurant  0.05
3      Sandwich Place  0.05
4        Burger Joint  0.03


----Hillcrest Village----
                  venue  freq
0           Golf Course  0.17
1                  Pool  0.17
2    Athletics & Sports  0.17
3  Fast Food Restaurant  0.17
4               Dog Run  0.17


----Humber Summit----
                       venue  freq
0                Pizza Place   1.0
1  Latin American Restaurant   0.0
2                   Pharmacy   0.0
3                  Pet Store   0.0
4      Performing Arts Venue   0.0


----Leaside----
                    venue  freq
0             Coffee Shop  0.09
1     Sp

In [42]:
#### Let's put that into a _pandas_ dataframe
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [45]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Boat or Ferry,Sculpture Garden,Harbor / Marina,Rental Car Location,Airport Food Court,Airport
1,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Burger Joint,Salad Place,Yoga Studio,Indian Restaurant
2,Hillcrest Village,Dog Run,Golf Course,Pool,Fast Food Restaurant,Athletics & Sports,Mediterranean Restaurant,Discount Store,Comfort Food Restaurant,Comic Shop,Convenience Store
3,Humber Summit,Pizza Place,Distribution Center,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Department Store
4,Leaside,Coffee Shop,Sporting Goods Shop,Furniture / Home Store,Burger Joint,Sushi Restaurant,Bank,Breakfast Spot,Beer Store,Bagel Shop,Dessert Shop


## Cluster Neighborhoods

In [77]:
from sklearn.cluster import KMeans
#Run _k_-means to cluster the neighborhood into 4 clusters.
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0, verbose=1,algorithm='full',max_iter=1000).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

Initialization complete
Iteration 0, inertia 1.7337142692667784
Iteration 1, inertia 0.9769020875039892
Iteration 2, inertia 0.7482424416362817
Converged at iteration 2: strict convergence.
Initialization complete
Iteration 0, inertia 1.4257059721424417
Iteration 1, inertia 1.0307668102672076
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 1.0974281554704188
Iteration 1, inertia 0.7482424416362817
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 1.1178288526001117
Iteration 1, inertia 0.7482424416362817
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 1.4262524202298736
Iteration 1, inertia 1.0516094450032851
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 1.4257059721424417
Iteration 1, inertia 1.0307668102672076
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 1.3348919851400

array([2, 2, 2, 0, 2, 1, 2, 2, 3, 2])

In [78]:
# merge dataframes for plot
zips.rename(columns = {"Neighbourhood": "Neighborhood"}, inplace=True)

In [79]:
toronto_merged = pd.merge(neighborhoods_venues_sorted,
                 zips[['Neighborhood','Latitude', 'Longitude']],on='Neighborhood')
toronto_merged.head() # check the last columns!

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
0,2,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Coffee Shop,Boat or Ferry,Sculpture Garden,Harbor / Marina,Rental Car Location,Airport Food Court,Airport,43.628947,-79.39442
1,2,Central Bay Street,Coffee Shop,Café,Sandwich Place,Italian Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Burger Joint,Salad Place,Yoga Studio,Indian Restaurant,43.657952,-79.387383
2,2,Hillcrest Village,Dog Run,Golf Course,Pool,Fast Food Restaurant,Athletics & Sports,Mediterranean Restaurant,Discount Store,Comfort Food Restaurant,Comic Shop,Convenience Store,43.803762,-79.363452
3,0,Humber Summit,Pizza Place,Distribution Center,Clothing Store,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Department Store,43.756303,-79.565963
4,2,Leaside,Coffee Shop,Sporting Goods Shop,Furniture / Home Store,Burger Joint,Sushi Restaurant,Bank,Breakfast Spot,Beer Store,Bagel Shop,Dessert Shop,43.70906,-79.363452


In [80]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

In [81]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
3,Humber Summit,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Department Store,43.756303,-79.565963


In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
5,"Malvern, Rouge",Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,Department Store,43.806686,-79.194353


In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
0,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Boat or Ferry,Sculpture Garden,Harbor / Marina,Rental Car Location,Airport Food Court,Airport,43.628947,-79.39442
1,Central Bay Street,Italian Restaurant,Middle Eastern Restaurant,Bubble Tea Shop,Burger Joint,Salad Place,Yoga Studio,Indian Restaurant,43.657952,-79.387383
2,Hillcrest Village,Fast Food Restaurant,Athletics & Sports,Mediterranean Restaurant,Discount Store,Comfort Food Restaurant,Comic Shop,Convenience Store,43.803762,-79.363452
4,Leaside,Burger Joint,Sushi Restaurant,Bank,Breakfast Spot,Beer Store,Bagel Shop,Dessert Shop,43.70906,-79.363452
6,"Parkview Hill, Woodbine Gardens",Flea Market,Gym / Fitness Center,Intersection,Café,Breakfast Spot,Bank,Gastropub,43.706397,-79.309937
7,"Regent Park, Harbourfront",Pub,Café,Theater,Breakfast Spot,Restaurant,Electronics Store,Italian Restaurant,43.65426,-79.360636
9,"South Steeles, Silverstone, Humbergate, Jamest...",Video Store,Fried Chicken Joint,Fast Food Restaurant,Beer Store,Sandwich Place,Dessert Shop,Coffee Shop,43.739416,-79.588437
10,Studio District,Brewery,Café,Gastropub,Gay Bar,Diner,Park,Middle Eastern Restaurant,43.659526,-79.340923
11,"Wexford, Maryvale",Smoke Shop,Bakery,Wine Shop,Distribution Center,Comfort Food Restaurant,Comic Shop,Convenience Store,43.750072,-79.295849


In [84]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Latitude,Longitude
8,Scarborough Village,Distribution Center,Coffee Shop,Comfort Food Restaurant,Comic Shop,Convenience Store,Cosmetics Shop,Coworking Space,43.744734,-79.239476


It looks like the 3 "single" neighborhood clusters were very similar in terms of the common venues.  
The 3rd cluster which includes the outlier seem to be more diverse.