## Capstone - Comparison of Toronto to Vancouver Neighborhoods

In [1]:
# Imports
import requests
import pandas as pd
import numpy as np
import geocoder
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from sklearn.cluster import KMeans

In [2]:
# default values
toronto_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
vancouver_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_V'

# Foursquare
CLIENT_ID = '1NUIRXBVBZSLIHU0YZOMDKGLNUN3MIIYO2BJZSKW5032EEJE' # your Foursquare ID
CLIENT_SECRET = 'XIWOVXYYUEAU0Y22UK1ZTPIAHGF0TFOECGJVW4WZ4OQBTGRN' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

### Toronto Postal Codes

In [3]:
# Get postal codes for Toronto
toronto_html = requests.get(toronto_url).content

# Get a list of tables
df_list = pd.read_html(toronto_html, header=0)

# We want the first table
df_toronto = df_list[0]

# Drop the rows that have Borough = 'Not assigned'
df_toronto = df_toronto[df_toronto['Borough'] != 'Not assigned']

# Set the neighborhood to the borough when it is 'Not assigned'
df_toronto['Neighbourhood'] = df_toronto.apply(lambda row: row['Borough'] if row['Neighbourhood'] == 'Not assigned' else row['Neighbourhood'], axis=1)

# Let's use the subset of Boroughs with 'Toronto' in the name
df_toronto = df_toronto[df_toronto['Borough'].str.contains('Toronto')]

# Group the Postcode for all neighbourhoods
df_toronto = df_toronto.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(','.join).reset_index()

# Reset the index
df_toronto.reset_index()


print('Toronto Columns: {}'.format(df_toronto.columns))
print('Toronto Initial shape: {}'.format(df_toronto.shape))

df_toronto.head()

Toronto Columns: Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')
Toronto Initial shape: (38, 3)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4E,East Toronto,The Beaches
1,M4K,East Toronto,"The Danforth West,Riverdale"
2,M4L,East Toronto,"The Beaches West,India Bazaar"
3,M4M,East Toronto,Studio District
4,M4N,Central Toronto,Lawrence Park


### Vancouver Postal Codes

In [4]:
# Get postal codes for Vancouver
vancouver_html = requests.get(vancouver_url).content

# Get a list of tables
df_list = pd.read_html(vancouver_html, header=None)

# We want the first table, but we need to process the data. Some of the data is formatted inconsistently
df_raw = df_list[0]
v5x = df_raw.iloc[:,4]
v6x = df_raw.iloc[:,5]

# V5X data
d5 = []
for i, v in v5x.items():
    x = v.split('(')
    d5.append({'Postcode': x[0][0:3], 'Borough': x[0][3:], 'Neighbourhood': x[1][0:-1]})

df_d5 = pd.DataFrame(d5)

# V6X data
d6 = []
for i, v in v6x.items():
    x = v.split('(')

    if (len(x) > 1):
        d6.append({'Postcode': x[0][0:3], 'Borough': x[0][3:], 'Neighbourhood': x[1][0:-1]})
    else:
        d6.append({'Postcode': x[0][0:3], 'Borough': x[0][3:11], 'Neighbourhood': x[0][11:]})

df_d6 = pd.DataFrame(d6)

# Append the DataFrames
df_vancouver = pd.DataFrame(columns=['Postcode', 'Borough', 'Neighbourhood'])
df_vancouver = df_vancouver.append(df_d5, ignore_index=True, sort=False)
df_vancouver = df_vancouver.append(df_d6, ignore_index=True, sort=False)

print('Vancouver Columns: {}'.format(df_vancouver.columns))
print('Vancouver Initial shape: {}'.format(df_vancouver.shape))

df_vancouver.head()

Vancouver Columns: Index(['Postcode', 'Borough', 'Neighbourhood'], dtype='object')
Vancouver Initial shape: (40, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,V5A,Burnaby,Government Road / Lake City / SFU / Burnaby Mo...
1,V5B,Burnaby,Parkcrest-Aubrey / Ardingley-Sprott
2,V5C,Burnaby,Burnaby Heights / Willingdon Heights / West Ce...
3,V5E,Burnaby,Lakeview-Mayfield / Richmond Park / Kingsway-B...
4,V5G,Burnaby,Cascade-Schou / Douglas-Gilpin


In [5]:
# We have the Toronto and Vancouver DataFrames, time to get coordinates

def get_coords(pc, city, province):
    lat_lng_coords = None

    while (lat_lng_coords is None):
        print('Looking for {}'.format(pc))
        g = geocoder.arcgis('{}, {}, {}'.format(pc, city, province))
        lat_lng_coords = g.latlng

    return pd.Series([lat_lng_coords[0], lat_lng_coords[1]], index=['Latitude', 'Longitude'])

### Get Toronto Geocodes

In [6]:
# Get the Toronto Latitude and Longitude from geocoder
df_toronto = df_toronto.merge(df_toronto['Postcode'].apply(lambda x: get_coords(x, 'Toronto', 'Ontario')), left_index=True, right_index=True)
df_toronto.head()

Looking for M4E
Looking for M4K
Looking for M4L
Looking for M4M
Looking for M4N
Looking for M4P
Looking for M4R
Looking for M4S
Looking for M4T
Looking for M4V
Looking for M4W
Looking for M4X
Looking for M4Y
Looking for M5A
Looking for M5B
Looking for M5C
Looking for M5E
Looking for M5G
Looking for M5H
Looking for M5J
Looking for M5K
Looking for M5L
Looking for M5N
Looking for M5P
Looking for M5R
Looking for M5S
Looking for M5T
Looking for M5V
Looking for M5W
Looking for M5X
Looking for M6G
Looking for M6H
Looking for M6J
Looking for M6K
Looking for M6P
Looking for M6R
Looking for M6S
Looking for M7Y


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676845,-79.295225
1,M4K,East Toronto,"The Danforth West,Riverdale",43.683262,-79.35512
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.667965,-79.314673
3,M4M,East Toronto,Studio District,43.662766,-79.33483
4,M4N,Central Toronto,Lawrence Park,43.72816,-79.387085


### Get Vancouver Geocodes

In [7]:
# Get the Vancouver Latitude and Longitude from geocoder
df_vancouver = df_vancouver.merge(df_vancouver['Postcode'].apply(lambda x: get_coords(x, 'Vancouver', 'British Columbia')), left_index=True, right_index=True)
df_vancouver.head()

Looking for V5A
Looking for V5B
Looking for V5C
Looking for V5E
Looking for V5G
Looking for V5H
Looking for V5J
Looking for V5K
Looking for V5L
Looking for V5M
Looking for V5N
Looking for V5P
Looking for V5R
Looking for V5S
Looking for V5T
Looking for V5V
Looking for V5W
Looking for V5X
Looking for V5Y
Looking for V5Z
Looking for V6A
Looking for V6B
Looking for V6C
Looking for V6E
Looking for V6G
Looking for V6H
Looking for V6J
Looking for V6K
Looking for V6L
Looking for V6M
Looking for V6N
Looking for V6P
Looking for V6R
Looking for V6S
Looking for V6T
Looking for V6V
Looking for V6W
Looking for V6X
Looking for V6Y
Looking for V6Z


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,V5A,Burnaby,Government Road / Lake City / SFU / Burnaby Mo...,49.269835,-122.926264
1,V5B,Burnaby,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705
2,V5C,Burnaby,Burnaby Heights / Willingdon Heights / West Ce...,49.273948,-123.002575
3,V5E,Burnaby,Lakeview-Mayfield / Richmond Park / Kingsway-B...,49.22661,-122.954131
4,V5G,Burnaby,Cascade-Schou / Douglas-Gilpin,49.245075,-122.996302


### Map Toronto Postal Codes

In [8]:
# Map the Toronto postal codes
# Toronto lat lng
toronto_lat = 43.648690000000045
toronto_lng = -79.38543999999996

# Map the the boroughs, acquired from arcgis
map_toronto = folium.Map(location=[toronto_lat, toronto_lng], zoom_start=11)

for lat, lng, borough, neighbourhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)

map_toronto

### Map Vancouver Postal Codes

In [9]:
# Map the Vancouver postal codes
# Vancouver lat lng
vancouver_lat = 49.24966
vancouver_lng = -123.11934

# Map the the boroughs, acquired from arcgis
map_vancouver = folium.Map(location=[vancouver_lat, vancouver_lng], zoom_start=11)

for lat, lng, borough, neighbourhood in zip(df_vancouver['Latitude'], df_vancouver['Longitude'], df_vancouver['Borough'], df_vancouver['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_vancouver)

map_vancouver

### Define the function for parsing the Foursquare JSON data

In [10]:
# Prepare our venues function
def getNearbyVenues(names, latitudes, longitudes, radius=500):

    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,
            CLIENT_SECRET,
            VERSION,
            lat,
            lng,
            radius,
            LIMIT)

        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            lat,
            lng,
            v['venue']['name'],
            v['venue']['location']['lat'],
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood',
                  'Neighbourhood Latitude',
                  'Neighbourhood Longitude',
                  'Venue',
                  'Venue Latitude',
                  'Venue Longitude',
                  'Venue Category']

    return(nearby_venues)

### Explore Foursquare for Toronto venues

In [11]:
# Prepare for the Foursquare explore of venues for Toronto

# Set our limits
LIMIT = 100
radius = 500  # in meters

# Get the full set of Postcode venues
toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                 latitudes=df_toronto['Latitude'],
                                 longitudes=df_toronto['Longitude'])

The Beaches
The Danforth West,Riverdale
The Beaches West,India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Roselawn
Forest Hill North,Forest Hill West
The Annex,North Midtown,Yorkville
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie
Dovercourt Village,Dufferin
Little Portugal,Trinity
Brockton,Exhibition Place,Parkdale Village
High Park,The Junction South
Parkdale,Roncesvall

In [12]:
# How many venues did we retrieve?
print('Toronto Venues shape: {}'.format(toronto_venues.shape))
toronto_venues.head()

Toronto Venues shape: (1746, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676845,-79.295225,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676845,-79.295225,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676845,-79.295225,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676845,-79.295225,Beaches Rec Centre,43.673659,-79.298634,College Gym
4,The Beaches,43.676845,-79.295225,Upper Beaches,43.680563,-79.292869,Neighborhood


In [13]:
# How many venues did we get for each Neighbourhood?
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,62,62,62,62,62,62
"Brockton,Exhibition Place,Parkdale Village",67,67,67,67,67,67
Business Reply Mail Processing Centre 969 Eastern,100,100,100,100,100,100
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",69,69,69,69,69,69
"Cabbagetown,St. James Town",44,44,44,44,44,44
Central Bay Street,98,98,98,98,98,98
"Chinatown,Grange Park,Kensington Market",95,95,95,95,95,95
Christie,9,9,9,9,9,9
Church and Wellesley,88,88,88,88,88,88


### Explore Foursquare for Vancouver venues

In [14]:
# Prepare for the Foursquare explore of venues for Vancouver

# Set our limits
LIMIT = 100
radius = 500  # in meters

# Get the full set of Postcode venues
vancouver_venues = getNearbyVenues(names=df_vancouver['Neighbourhood'],
                                 latitudes=df_vancouver['Latitude'],
                                 longitudes=df_vancouver['Longitude'])

Government Road / Lake City / SFU / Burnaby Mountain
Parkcrest-Aubrey / Ardingley-Sprott
Burnaby Heights / Willingdon Heights / West Central Valley
Lakeview-Mayfield / Richmond Park / Kingsway-Beresford
Cascade-Schou / Douglas-Gilpin
Maywood / Marlborough / Oakalla / Windsor
Suncrest / Sussex-Nelson / Clinton-Glenwood / West Big Bend
North Hastings-Sunrise
North Grandview-Woodland
South Hastings-Sunrise / North Renfrew-Collingwood
South Grandview-Woodland / NE Kensington-Cedar Cottage
SE Kensington-Cedar Cottage / Victoria-Fraserview
South Renfrew-Collingwood
Killarney
East Mount Pleasant
West Kensington-Cedar Cottage / NE Riley Park-Little Mountain
SE Riley Park-Little Mountain / SW Kensington-Cedar Cottage / NE Oakridge / North Sunset
SE Oakridge / East Marpole / South Sunset
West Mount Pleasant / West Riley Park-Little Mountain
East Fairview / South Cambie
Strathcona / Chinatown / Downtown Eastside
NE Downtown / Gastown / Harbour Centre / International Village / Victory Square / Yal

In [15]:
# How many venues did we retrieve?
print('Vancouver Venues shape: {}'.format(vancouver_venues.shape))
vancouver_venues.head()

Vancouver Venues shape: (855, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Government Road / Lake City / SFU / Burnaby Mo...,49.269835,-122.926264,Bus Stop 52844 (136),49.265944,-122.929664,Bus Stop
1,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705,Garlic & Chili Restaurant,49.265424,-122.97846,Chinese Restaurant
2,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705,Garden Works,49.262458,-122.97331,Flower Shop
3,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705,Sushimoto,49.264544,-122.981629,Sushi Restaurant
4,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705,Subway,49.264128,-122.981307,Sandwich Place


In [16]:
# How many venues did we get for each Neighbourhood?
vancouver_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Cascade-Schou / Douglas-Gilpin,4,4,4,4,4,4
Central,1,1,1,1,1,1
Central Kitsilano / Greektown,53,53,53,53,53,53
East Fairview / South Cambie,23,23,23,23,23,23
East Mount Pleasant,14,14,14,14,14,14
Government Road / Lake City / SFU / Burnaby Mountain,1,1,1,1,1,1
Killarney,16,16,16,16,16,16
Lakeview-Mayfield / Richmond Park / Kingsway-Beresford,3,3,3,3,3,3
Maywood / Marlborough / Oakalla / Windsor,78,78,78,78,78,78
NE Downtown / Gastown / Harbour Centre / International Village / Victory Square / Yaletown,100,100,100,100,100,100


In [17]:
# How many unique venues for each city?
print('Toronto Unique categories: {}'.format(len(toronto_venues['Venue Category'].unique())))
print('Vancouver Unique categories: {}'.format(len(vancouver_venues['Venue Category'].unique())))

Toronto Unique categories: 211
Vancouver Unique categories: 182


### Toronto onehot encoding

In [18]:
# Prepare the onehot encoding for Toronto
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot.head()

Unnamed: 0,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
# Add neighbourhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood']

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
# We need to group the data per neighbourhood so that we have 1 row per neighborhood
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
print('Grouped shape: {}'.format(toronto_grouped.shape))


Grouped shape: (37, 212)


In [21]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
         venue  freq
0  Coffee Shop  0.09
1         Café  0.06
2        Hotel  0.04
3          Gym  0.03
4   Steakhouse  0.03


----Berczy Park----
          venue  freq
0   Coffee Shop  0.08
1  Cocktail Bar  0.05
2    Restaurant  0.05
3         Hotel  0.03
4        Bakery  0.03


----Brockton,Exhibition Place,Parkdale Village----
                    venue  freq
0             Coffee Shop  0.10
1                    Café  0.07
2              Restaurant  0.04
3                  Bakery  0.04
4  Furniture / Home Store  0.04


----Business Reply Mail Processing Centre 969 Eastern----
         venue  freq
0  Coffee Shop  0.09
1         Café  0.05
2          Bar  0.04
3   Steakhouse  0.04
4        Hotel  0.04


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
                  venue  freq
0           Coffee Shop  0.09
1    Italian Restaurant  0.07
2  Gym / Fitness Center  0.06
3                  Café  0.

### Vancouver onehot encoding

In [22]:
# Prepare the onehot encoding for Vancouver
vancouver_onehot = pd.get_dummies(vancouver_venues[['Venue Category']], prefix="", prefix_sep="")
vancouver_onehot.head()

Unnamed: 0,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Auto Garage,Bagel Shop,Bakery,Bank,...,Trade School,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Add neighbourhood column back to dataframe
vancouver_onehot['Neighbourhood'] = vancouver_venues['Neighbourhood']

# move neighborhood column to the first column
fixed_columns = [vancouver_onehot.columns[-1]] + list(vancouver_onehot.columns[:-1])
vancouver_onehot = vancouver_onehot[fixed_columns]

vancouver_onehot.head()

Unnamed: 0,Neighbourhood,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Australian Restaurant,Auto Garage,Bagel Shop,Bakery,...,Trade School,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Shop,Women's Store,Yoga Studio
0,Government Road / Lake City / SFU / Burnaby Mo...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkcrest-Aubrey / Ardingley-Sprott,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkcrest-Aubrey / Ardingley-Sprott,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkcrest-Aubrey / Ardingley-Sprott,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Parkcrest-Aubrey / Ardingley-Sprott,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
# We need to group the data per neighbourhood so that we have 1 row per neighborhood
vancouver_grouped = vancouver_onehot.groupby('Neighbourhood').mean().reset_index()
print('Grouped shape: {}'.format(vancouver_grouped.shape))

Grouped shape: (38, 183)


In [25]:
# Let's print each neighborhood along with the top 5 most common venues
num_top_venues = 5

for hood in vancouver_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = vancouver_grouped[vancouver_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Cascade-Schou / Douglas-Gilpin----
                 venue  freq
0                 Park  0.50
1          Auto Garage  0.25
2        Train Station  0.25
3  American Restaurant  0.00
4               Museum  0.00


----Central----
                 venue  freq
0                 Park   1.0
1  American Restaurant   0.0
2               Museum   0.0
3        Metro Station   0.0
4   Mexican Restaurant   0.0


----Central Kitsilano / Greektown----
            venue  freq
0     Coffee Shop  0.09
1     Pizza Place  0.06
2            Café  0.06
3   Grocery Store  0.06
4  Breakfast Spot  0.04


----East Fairview / South Cambie----
                venue  freq
0         Coffee Shop  0.22
1            Bus Stop  0.17
2                Park  0.09
3        Liquor Store  0.04
4  Light Rail Station  0.04


----East Mount Pleasant----
                  venue  freq
0  Ethiopian Restaurant  0.14
1      Sushi Restaurant  0.14
2                  Café  0.07
3                   Bar  0.07
4           Pizza Place 

### Compare the venues between Toronto and Vancouver

In [26]:
toronto_set = set(toronto_venues['Venue Category'].unique())
vancouver_set = set(vancouver_venues['Venue Category'].unique())

In [27]:
# Get the counts of the sets
print('Toronto venues count: {}'.format(len(toronto_set)))
print('Vancouver venues count: {}'.format(len(vancouver_set)))

# What are the common venues?
print('\n Similar venues:')
toronto_set & vancouver_set

Toronto venues count: 211
Vancouver venues count: 182

 Similar venues:


{'American Restaurant',
 'Art Gallery',
 'Arts & Crafts Store',
 'Asian Restaurant',
 'Athletics & Sports',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Belgian Restaurant',
 'Bookstore',
 'Breakfast Spot',
 'Brewery',
 'Bubble Tea Shop',
 'Building',
 'Burger Joint',
 'Burrito Place',
 'Café',
 'Camera Store',
 'Caribbean Restaurant',
 'Cheese Shop',
 'Chinese Restaurant',
 'Church',
 'Clothing Store',
 'Cocktail Bar',
 'Coffee Shop',
 'Concert Hall',
 'Convenience Store',
 'Cosmetics Shop',
 'Deli / Bodega',
 'Department Store',
 'Dessert Shop',
 'Dim Sum Restaurant',
 'Diner',
 'Discount Store',
 'Dog Run',
 'Donut Shop',
 'Electronics Store',
 'Ethiopian Restaurant',
 'Event Space',
 'Falafel Restaurant',
 'Farm',
 'Farmers Market',
 'Fast Food Restaurant',
 'Fish & Chips Shop',
 'Flower Shop',
 'Food Court',
 'Food Truck',
 'French Restaurant',
 'Fried Chicken Joint',
 'Frozen Yogurt Shop',
 'Furniture / Home Store',
 'Garden',
 'Gastropub',
 'Gay Bar',
 'Gift Shop',
 'Gourmet Sho

In [28]:
# What venues are in Toronto but not Vancouver?
print('\n Toronto ONLY venues:')
toronto_set - vancouver_set


 Toronto ONLY venues:


{'Afghan Restaurant',
 'Antique Shop',
 'Art Museum',
 'BBQ Joint',
 'Baby Store',
 'Basketball Court',
 'Basketball Stadium',
 'Beer Bar',
 'Beer Store',
 'Bistro',
 'Boutique',
 'Brazilian Restaurant',
 'Bus Line',
 'Butcher',
 'College Arts Building',
 'College Gym',
 'College Rec Center',
 'Colombian Restaurant',
 'Comfort Food Restaurant',
 'Comic Shop',
 'Costume Shop',
 'Creperie',
 'Cuban Restaurant',
 'Cupcake Shop',
 'Dance Studio',
 'Dumpling Restaurant',
 'Eastern European Restaurant',
 'Fish Market',
 'Flea Market',
 'Food',
 'Food & Drink Shop',
 'Fountain',
 'Gaming Cafe',
 'General Entertainment',
 'General Travel',
 'Gluten-free Restaurant',
 'History Museum',
 'Hobby Shop',
 'Hookah Bar',
 'Intersection',
 'Irish Pub',
 'Jazz Club',
 'Jewish Restaurant',
 'Latin American Restaurant',
 'Mac & Cheese Joint',
 'Martial Arts Dojo',
 'Modern European Restaurant',
 'Molecular Gastronomy Restaurant',
 'Monument / Landmark',
 'Neighborhood',
 'Opera House',
 'Optical Shop',
 

In [29]:
# What venues are in Vancouver but not Toronto?
print('\n Vancouver ONLY venues:')
vancouver_set - toronto_set



 Vancouver ONLY venues:


{'Australian Restaurant',
 'Auto Garage',
 'Baseball Field',
 'Beach',
 'Beer Garden',
 'Bus Station',
 'Bus Stop',
 'Cafeteria',
 'Cantonese Restaurant',
 'Child Care Service',
 'Chiropractor',
 'Chocolate Shop',
 'Cycle Studio',
 'Fair',
 'Field',
 'Filipino Restaurant',
 'Financial or Legal Service',
 'Garden Center',
 'Golf Course',
 'Hawaiian Restaurant',
 'Himalayan Restaurant',
 'Hockey Arena',
 'Hot Dog Joint',
 'Indie Movie Theater',
 'Inn',
 'Insurance Office',
 'Japanese Curry Restaurant',
 'Laundromat',
 'Leather Goods Store',
 'Lebanese Restaurant',
 'Massage Studio',
 'Metro Station',
 'Mobile Phone Shop',
 'Motorcycle Shop',
 'Moving Target',
 'Outdoor Sculpture',
 'Paper / Office Supplies Store',
 'Pool',
 'Portuguese Restaurant',
 'Scenic Lookout',
 'Shanghai Restaurant',
 'Shop & Service',
 'Soccer Stadium',
 'South Indian Restaurant',
 'Stadium',
 'Tennis Court',
 'Theme Park',
 'Theme Park Ride / Attraction',
 'Track',
 'Trade School',
 'Video Store',
 'Warehouse St

### Prepare for finding the most common venues in a postal code

In [30]:
# Create a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)

    return row_categories_sorted.index.values[0:num_top_venues]

### Find the most common venues for Toronto

In [31]:
# Toronto: Sort the venues in descending order, using the return_most_common_venues() function. Then view the first 10 neighbourhoods.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
toronto_neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_neighbourhoods_venues_sorted.head(10)


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Hotel,Gym,Steakhouse,Japanese Restaurant,Bar,Asian Restaurant,Restaurant,Burger Joint
1,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Breakfast Spot,Steakhouse,Café,Beer Bar,Seafood Restaurant,Cheese Shop,Bakery
2,"Brockton,Exhibition Place,Parkdale Village",Coffee Shop,Café,Restaurant,Sandwich Place,Bakery,Furniture / Home Store,Vegetarian / Vegan Restaurant,Italian Restaurant,Bar,Beer Bar
3,Business Reply Mail Processing Centre 969 Eastern,Coffee Shop,Café,Hotel,Steakhouse,Bar,Italian Restaurant,American Restaurant,Restaurant,Pizza Place,Sushi Restaurant
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Coffee Shop,Italian Restaurant,Gym / Fitness Center,Café,Restaurant,Bar,Park,Speakeasy,Bakery,Sandwich Place
5,"Cabbagetown,St. James Town",Coffee Shop,Park,Bakery,Market,Café,Restaurant,Italian Restaurant,Pizza Place,Farm,Liquor Store
6,Central Bay Street,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Tea Room,Plaza,Café,Cosmetics Shop,Italian Restaurant,Chinese Restaurant,Sushi Restaurant
7,"Chinatown,Grange Park,Kensington Market",Café,Vegetarian / Vegan Restaurant,Bar,Chinese Restaurant,Dumpling Restaurant,Mexican Restaurant,Vietnamese Restaurant,Coffee Shop,Bakery,Gaming Cafe
8,Christie,Café,Grocery Store,Playground,Italian Restaurant,Baby Store,Coffee Shop,Ethiopian Restaurant,Flower Shop,Flea Market,Fish Market
9,Church and Wellesley,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Dance Studio,Sushi Restaurant,Bubble Tea Shop,Fast Food Restaurant,Pub,Men's Store


### Find the most common venues for Vancouver

In [32]:
# Vancouver: Sort the venues in descending order, using the return_most_common_venues() function. Then view the first 10 neighbourhoods.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
vancouver_neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
vancouver_neighbourhoods_venues_sorted['Neighbourhood'] = vancouver_grouped['Neighbourhood']

for ind in np.arange(vancouver_grouped.shape[0]):
    vancouver_neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(vancouver_grouped.iloc[ind, :], num_top_venues)

vancouver_neighbourhoods_venues_sorted.head(10)

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Cascade-Schou / Douglas-Gilpin,Park,Auto Garage,Train Station,Farm,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
1,Central,Park,Yoga Studio,Farmers Market,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Field
2,Central Kitsilano / Greektown,Coffee Shop,Grocery Store,Café,Pizza Place,Pub,Toy / Game Store,Indian Restaurant,Bus Stop,Bakery,Spa
3,East Fairview / South Cambie,Coffee Shop,Bus Stop,Park,Café,Cafeteria,Malay Restaurant,Liquor Store,Light Rail Station,Chinese Restaurant,Sushi Restaurant
4,East Mount Pleasant,Sushi Restaurant,Ethiopian Restaurant,Outdoor Sculpture,Pizza Place,Café,Bar,Grocery Store,Sandwich Place,Pharmacy,Vietnamese Restaurant
5,Government Road / Lake City / SFU / Burnaby Mo...,Bus Stop,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
6,Killarney,Chinese Restaurant,Shopping Mall,Fast Food Restaurant,Juice Bar,Salon / Barbershop,Sushi Restaurant,Sandwich Place,Coffee Shop,Grocery Store,Bakery
7,Lakeview-Mayfield / Richmond Park / Kingsway-B...,Bus Stop,Convenience Store,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service
8,Maywood / Marlborough / Oakalla / Windsor,Coffee Shop,Clothing Store,American Restaurant,Hotel,Sandwich Place,Chinese Restaurant,Cosmetics Shop,Vietnamese Restaurant,Asian Restaurant,Bubble Tea Shop
9,NE Downtown / Gastown / Harbour Centre / Inter...,Hotel,Coffee Shop,Restaurant,Bakery,Breakfast Spot,Café,Pizza Place,Vietnamese Restaurant,Seafood Restaurant,Taco Place


### Begin K-means clustering

In [33]:
# Now we run the K-means clustering

# set number of clusters
kclusters = 10

# Drop the neighbourhood for the clustering process
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
toronto_kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
toronto_kmeans.labels_[0:20]


array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 2, 1, 3])

In [34]:
# Now we run the K-means clustering for Vancouver

# set number of clusters
kclusters = 10

# Drop the neighbourhood for the clustering process
vancouver_grouped_clustering = vancouver_grouped.drop('Neighbourhood', 1)

# run k-means clustering
vancouver_kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(vancouver_grouped_clustering)

# check cluster labels generated for each row in the dataframe
vancouver_kmeans.labels_[0:20]


array([5, 2, 1, 1, 1, 3, 9, 3, 1, 1, 0, 7, 1, 1, 6, 1, 1, 4, 9, 1])

In [35]:
# Create a dataframe that includes the cluster and the top 10 venues for each neighborhood.
# Toronto
# add clustering labels
toronto_neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', toronto_kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(toronto_neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676845,-79.295225,9.0,College Gym,Health Food Store,Pub,Trail,Neighborhood,Electronics Store,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
1,M4K,East Toronto,"The Danforth West,Riverdale",43.683262,-79.35512,8.0,Bus Line,Park,Discount Store,Grocery Store,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.667965,-79.314673,1.0,Park,Sandwich Place,Pizza Place,Sushi Restaurant,Pub,Movie Theater,Fast Food Restaurant,Fish & Chips Shop,Burrito Place,Burger Joint
3,M4M,East Toronto,Studio District,43.662766,-79.33483,1.0,Diner,Italian Restaurant,Bakery,Pizza Place,Sushi Restaurant,Café,Bar,Furniture / Home Store,Coffee Shop,Brewery
4,M4N,Central Toronto,Lawrence Park,43.72816,-79.387085,4.0,Bus Line,Swim School,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
5,M4P,Central Toronto,Davisville North,43.712815,-79.388526,1.0,Hotel,Park,Breakfast Spot,Clothing Store,Gym,Food & Drink Shop,Diner,Discount Store,Food,Flower Shop
6,M4R,Central Toronto,North Toronto West,43.714523,-79.40696,0.0,Playground,Gym Pool,Park,Garden,Electronics Store,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
7,M4S,Central Toronto,Davisville,43.703395,-79.385964,1.0,Dessert Shop,Pizza Place,Café,Sandwich Place,Coffee Shop,Italian Restaurant,Thai Restaurant,Farmers Market,Park,Seafood Restaurant
8,M4T,Central Toronto,"Moore Park,Summerhill East",43.690655,-79.383561,5.0,Convenience Store,Gym,Restaurant,Yoga Studio,Electronics Store,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
9,M4V,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",43.686083,-79.402335,7.0,Light Rail Station,Coffee Shop,Supermarket,Liquor Store,Yoga Studio,Falafel Restaurant,Food,Flower Shop,Flea Market,Fish Market


In [36]:
# Create a dataframe that includes the cluster and the top 10 venues for each neighborhood.
# Vancouver
# add clustering labels
vancouver_neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', vancouver_kmeans.labels_)

vancouver_merged = df_vancouver

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
vancouver_merged = vancouver_merged.join(vancouver_neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

vancouver_merged.head(10) # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,V5A,Burnaby,Government Road / Lake City / SFU / Burnaby Mo...,49.269835,-122.926264,3.0,Bus Stop,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
1,V5B,Burnaby,Parkcrest-Aubrey / Ardingley-Sprott,49.26601,-122.975705,9.0,Diner,Park,Flower Shop,Vietnamese Restaurant,Asian Restaurant,Light Rail Station,Sushi Restaurant,Chinese Restaurant,Sandwich Place,Chiropractor
2,V5C,Burnaby,Burnaby Heights / Willingdon Heights / West Ce...,49.273948,-123.002575,,,,,,,,,,,
3,V5E,Burnaby,Lakeview-Mayfield / Richmond Park / Kingsway-B...,49.22661,-122.954131,3.0,Bus Stop,Convenience Store,Fast Food Restaurant,Fried Chicken Joint,French Restaurant,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service
4,V5G,Burnaby,Cascade-Schou / Douglas-Gilpin,49.245075,-122.996302,5.0,Park,Auto Garage,Train Station,Farm,Food Truck,Food Court,Flower Shop,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
5,V5H,Burnaby,Maywood / Marlborough / Oakalla / Windsor,49.230925,-123.002065,1.0,Coffee Shop,Clothing Store,American Restaurant,Hotel,Sandwich Place,Chinese Restaurant,Cosmetics Shop,Vietnamese Restaurant,Asian Restaurant,Bubble Tea Shop
6,V5J,Burnaby,Suncrest / Sussex-Nelson / Clinton-Glenwood / ...,49.201845,-122.999099,1.0,Baseball Field,Athletics & Sports,Insurance Office,Golf Course,Fast Food Restaurant,French Restaurant,Food Truck,Food Court,Flower Shop,Fish & Chips Shop
7,V5K,Vancouver,North Hastings-Sunrise,49.281665,-123.03998,1.0,Theme Park Ride / Attraction,Bus Station,Theme Park,Event Space,Park,Inn,Soccer Field,Sandwich Place,Stadium,Sushi Restaurant
8,V5L,Vancouver,North Grandview-Woodland,49.2807,-123.066842,1.0,Café,Coffee Shop,Chinese Restaurant,Asian Restaurant,Bakery,Brewery,Theater,Deli / Bodega,French Restaurant,Market
9,V5M,Vancouver,South Hastings-Sunrise / North Renfrew-Colling...,49.260095,-123.040085,1.0,Bus Stop,Coffee Shop,Liquor Store,Insurance Office,Chinese Restaurant,Sandwich Place,Metro Station,Restaurant,Mexican Restaurant,Furniture / Home Store


### Map the Toronto clusters

In [37]:
# Map the clusters for Toronto

# create map
toronto_map_clusters = folium.Map(location=[toronto_lat, toronto_lng], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(toronto_map_clusters)

toronto_map_clusters

TypeError: list indices must be integers or slices, not float

### Review the clusters 

In [None]:
toronto_merged.groupby(['Cluster Labels']).count()

### Toronto Cluster: 0
This cluster is heavily loaded with Coffee Shops, Cafés, and small eating places.

In [None]:
# What is cluster 0?
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

In [None]:
print("Toronto: Number of Boroughs that have 'Chinese Restaurant' in the top 4 most common venues: {}".format(
    len(toronto_merged[(toronto_merged['Cluster Labels'] == 0) & 
                     (toronto_merged['1st Most Common Venue'] == 'Chinese Restaurant')
                    | (toronto_merged['2nd Most Common Venue'] == 'Chinese Restaurant')
                    | (toronto_merged['3rd Most Common Venue'] == 'Chinese Restaurant')
                    | (toronto_merged['4th Most Common Venue'] == 'Chinese Restaurant')])))

### Toronto Cluster: 7
This cluster has Parks and Playgrounds. The eating places are much farther down the list in frequency.

In [None]:
# What is cluster 7?
toronto_merged.loc[toronto_merged['Cluster Labels'] == 7, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

### Cleanup missing venue data for Vancouver

In [None]:
# Remove the postal code that did not return any venues
vancouver_merged.dropna(inplace=True)

# Make sure that the Cluster Labels are integers
vancouver_merged = vancouver_merged.astype({'Cluster Labels': 'int32'})
vancouver_merged.head()

### Map the Vancouver clusters

In [None]:
# Map the clusters for Vancouver

# create map
vancouver_map_clusters = folium.Map(location=[vancouver_lat, vancouver_lng], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(vancouver_merged['Latitude'], vancouver_merged['Longitude'], vancouver_merged['Neighbourhood'], vancouver_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(vancouver_map_clusters)

vancouver_map_clusters

### Review the clusters 

In [None]:
vancouver_merged.groupby(['Cluster Labels']).count()

### Vancouver Cluster: 9
This cluster has many restaurants.

In [None]:
# What is cluster 9?
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 9, vancouver_merged.columns[[1] + list(range(5, vancouver_merged.shape[1]))]]

In [None]:
print("Vancouver: Number of Boroughs that have 'Chinese Restaurant' in the top 4 most common venues: {}".format(
    len(vancouver_merged[(vancouver_merged['Cluster Labels'] == 9) & 
                     (vancouver_merged['1st Most Common Venue'] == 'Chinese Restaurant')
                    | (vancouver_merged['2nd Most Common Venue'] == 'Chinese Restaurant')
                    | (vancouver_merged['3rd Most Common Venue'] == 'Chinese Restaurant')
                    | (vancouver_merged['4th Most Common Venue'] == 'Chinese Restaurant')])))

### Vancouver Cluster: 0
This cluster has Coffee Shops, Bus Stops and other venues. 

In [None]:
# What is cluster 0?
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 0, vancouver_merged.columns[[1] + list(range(5, vancouver_merged.shape[1]))]]

### Vancouver Cluster: 6
This cluster has many parks.

In [None]:
# What is cluster 6?
vancouver_merged.loc[vancouver_merged['Cluster Labels'] == 6, vancouver_merged.columns[[1] + list(range(5, vancouver_merged.shape[1]))]]

### Conclusion
Toronto and Vancouver have central areas that have many restaurants, coffee shops, bars, and other commercial venues. The clusting shows these areas to be similarly located in the downtown area.

As we review the other clusters we find these are on the edges of our maps. This is where the living areas and less commercial areas are located. This is where we find the parks and playgrounds located. 

When comparing the Toronto(cluster 0) and Vancouver(cluster 9) clusters, we are comparing the downtown commercial areas. This is where we find the largest concentration of restaurants. The Toronto cluster does not have a single Borough having a 'Chinese Restaurant' in the top 4 most common venues. The Vancouver cluster has 7 (of 23) Boroughs with 'Chinese Restaurant' in the top 4 most common venues.