# Part 3 of Capstone Project

Importing Required libraries

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from geopy.geocoders import Nominatim
import geocoder
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)

soup = BeautifulSoup(r.content, 'html5lib')
table = soup.find('div', attrs ={'id':'container'})

In [3]:
postalCodes = [];
boroughs= [];
neighborhoods = [];
columnNum = 1;
passVal = False

for row in soup.find_all('td'):
    for cell in row:
        if cell.string and cell.string[0].isalpha() and len(cell.string) > 2:
            passVal = False
            if columnNum == 1:
                if passVal == False and cell.string[1].isdigit():
                    postalCodes.append(cell.string.strip('\n'));   
                    columnNum = 2
                else:
                    continue
            elif columnNum == 2 :
                if cell.string == 'Not assigned':
                    passVal = True
                    del postalCodes[-1]
                    columnNum = 1
                    continue
                else:
                    boroughs.append(cell.string.strip('\n'));      
                    columnNum = 3
            elif columnNum == 3 :
                if cell.string == 'Not assigned\n':
                    neighborhoods.append(boroughs[-1])
                else:
                    neighborhoods.append(cell.string.strip('\n')); 
                columnNum = 1

# Defining Dataframe

In [4]:
# define the dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighbors = pd.DataFrame(columns=column_names)

neighbors

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


# Feeding data

In [5]:
for data in range(0, len(postalCodes)-1):
    code = postalCodes[data]
    borough = boroughs[data]
    neighborhood_name = neighborhoods[data]
    
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    lat_lng_coords = g.latlng

    neighbors = neighbors.append({ 'PostalCode': code,
                                   'Borough': borough,
                                   'Neighborhood': neighborhood_name,
                                   'Latitude': lat_lng_coords[0],
                                   'Longitude': lat_lng_coords[1]}, ignore_index=True)
neighbors

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752420,-79.329242
1,M4A,North York,Victoria Village,43.730600,-79.313265
2,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166
3,M6A,North York,Lawrence Heights,43.723270,-79.451286
4,M6A,North York,Lawrence Manor,43.723270,-79.451286
5,M7A,Downtown Toronto,Queen's Park,43.661150,-79.391715
6,M9A,Queen's Park,Queen's Park,43.662299,-79.528195
7,M1B,Scarborough,Rouge,43.811525,-79.195517
8,M1B,Scarborough,Malvern,43.811525,-79.195517
9,M3B,North York,Don Mills North,43.749055,-79.362227


In [6]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighbors['Borough'].unique()),
        neighbors.shape[0]
    )
)

The dataframe has 11 boroughs and 209 neighborhoods.


Use geopy library to get the latitude and longitude values of Toronto City.

In [7]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


Create a map of Toronto with neighborhoods superimposed on top

In [8]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbors['Latitude'], neighbors['Longitude'], neighbors['Borough'], neighbors['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Define Foursquare Credentials and Version

In [9]:
CLIENT_ID = 'GWK0X324WQBI2B3OS4FSRZD2U1TF2EVNE2OSZP40LV5FWWWH' # your Foursquare ID
CLIENT_SECRET = 'JBX4E03LI3MX5BLLY1PJRGUYMGNHGF1X1NX5JAOLGTNPNX4W' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: GWK0X324WQBI2B3OS4FSRZD2U1TF2EVNE2OSZP40LV5FWWWH
CLIENT_SECRET:JBX4E03LI3MX5BLLY1PJRGUYMGNHGF1X1NX5JAOLGTNPNX4W


In [10]:
neighborhood_name = neighbors.loc[0, 'Neighborhood'] # neighborhood name
neighborhood_latitude = neighbors.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = neighbors.loc[0, 'Longitude'] # neighborhood longitude value

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75242000000003, -79.32924245299995.


In [11]:
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=GWK0X324WQBI2B3OS4FSRZD2U1TF2EVNE2OSZP40LV5FWWWH&client_secret=JBX4E03LI3MX5BLLY1PJRGUYMGNHGF1X1NX5JAOLGTNPNX4W&v=20180605&ll=43.75242000000003,-79.32924245299995&radius=500&limit=100'

Send the GET request and examine the resutls

In [12]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e5415a829ce6a001b5e0e41'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.75692000450003,
    'lng': -79.32302427998279},
   'sw': {'lat': 43.74791999550003, 'lng': -79.33546062601711}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 238,
        'cc': 'CA',
       

In [13]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114
2,TTC stop - 44 Valley Woods,Bus Stop,43.755402,-79.333741


# Explore Neighborhoods in Toronto

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
toronto_venues = getNearbyVenues(names=neighborhoods,
                                 latitudes=neighbors['Latitude'],
                                 longitudes=neighbors['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront
Lawrence Heights
Lawrence Manor
Queen's Park
Queen's Park
Rouge
Malvern
Don Mills North
Woodbine Gardens
Parkview Hill
Ryerson
Garden District
Glencairn
Cloverdale
Islington
Martin Grove
Princess Gardens
West Deane Park
Highland Creek
Rouge Hill
Port Union
Flemingdon Park
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens
Eringate
Markland Wood
Old Burnhamthorpe
Guildwood
Morningside
West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor
Downsview North
Wilson Heights
Thorncliffe Park
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Scarborough Village
Fairview
Henry Farm
Oriole
Northwood Park
York University
East Toronto
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
East Birchmount Park
Ionview
Kennedy Park
Bayview Village
CFB Toronto
Downsview East
The Danforth West
Riverdale
Design Exchange
Toronto 

In [19]:
print(toronto_venues.shape)
toronto_venues.head()

(4722, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75242,-79.329242,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75242,-79.329242,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Parkwoods,43.75242,-79.329242,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
3,Victoria Village,43.7306,-79.313265,Wigmore Park,43.731023,-79.310771,Park
4,Victoria Village,43.7306,-79.313265,Memories of Africa,43.726602,-79.312427,Grocery Store


In [20]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Agincourt,15,15,15,15,15,15
Agincourt North,2,2,2,2,2,2
Albion Gardens,16,16,16,16,16,16
Alderwood,4,4,4,4,4,4
Bathurst Quay,69,69,69,69,69,69
Bayview Village,4,4,4,4,4,4
Beaumond Heights,16,16,16,16,16,16
Bedford Park,20,20,20,20,20,20
Berczy Park,63,63,63,63,63,63


In [21]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 263 uniques categories.


# Analyze Each Neighborhood 

In [22]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
toronto_onehot.shape

(4722, 263)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [24]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Adelaide,0.000000,0.0,0.000000,0.00,0.030000,0.0,0.010000,0.000000,0.030000,...,0.00,0.0,0.010000,0.000000,0.0000,0.000000,0.010000,0.00,0.000000,0.000000
1,Agincourt,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0000,0.066667,0.000000,0.00,0.000000,0.000000
2,Agincourt North,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000
3,Albion Gardens,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0625,0.000000,0.000000,0.00,0.000000,0.000000
4,Alderwood,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000
5,Bathurst Quay,0.014493,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.014493,...,0.00,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000
6,Bayview Village,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000
7,Beaumond Heights,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0625,0.000000,0.000000,0.00,0.000000,0.000000
8,Bedford Park,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.000000,0.000000,0.000000,...,0.00,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000
9,Berczy Park,0.000000,0.0,0.000000,0.00,0.000000,0.0,0.015873,0.000000,0.000000,...,0.00,0.0,0.015873,0.000000,0.0000,0.000000,0.000000,0.00,0.000000,0.000000


In [25]:

toronto_grouped.shape

(197, 263)

Let's print each neighborhood along with the top 3 most common venues

In [26]:
num_top_venues = 3

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.06
2        Hotel  0.04


----Agincourt----
                venue  freq
0       Shopping Mall  0.13
1  Chinese Restaurant  0.13
2      Discount Store  0.07


----Agincourt North----
              venue  freq
0  Sushi Restaurant   0.5
1          Pharmacy   0.5
2       Yoga Studio   0.0


----Albion Gardens----
           venue  freq
0  Grocery Store  0.12
1    Coffee Shop  0.06
2           Park  0.06


----Alderwood----
               venue  freq
0  Convenience Store  0.25
1                Gym  0.25
2                Pub  0.25


----Bathurst Quay----
                  venue  freq
0           Coffee Shop  0.10
1  Gym / Fitness Center  0.06
2    Italian Restaurant  0.06


----Bayview Village----
                        venue  freq
0  Construction & Landscaping  0.25
1                       Trail  0.25
2                     Dog Run  0.25


----Beaumond Heights----
           venue  freq
0  Grocery Store  0.12
1    

Let's put that into a pandas dataframe

In [27]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [28]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Adelaide,Coffee Shop,Café,Steakhouse,Hotel,Gastropub
1,Agincourt,Shopping Mall,Chinese Restaurant,Discount Store,Department Store,Shanghai Restaurant
2,Agincourt North,Pharmacy,Sushi Restaurant,Women's Store,Falafel Restaurant,Dumpling Restaurant
3,Albion Gardens,Grocery Store,Hardware Store,Japanese Restaurant,Pizza Place,Park
4,Alderwood,Gym,Convenience Store,Performing Arts Venue,Pub,Falafel Restaurant
5,Bathurst Quay,Coffee Shop,Gym / Fitness Center,Italian Restaurant,Bar,Café
6,Bayview Village,Construction & Landscaping,Trail,Park,Dog Run,Dumpling Restaurant
7,Beaumond Heights,Grocery Store,Hardware Store,Japanese Restaurant,Pizza Place,Park
8,Bedford Park,Sandwich Place,Coffee Shop,Italian Restaurant,Juice Bar,Café
9,Berczy Park,Coffee Shop,Seafood Restaurant,Restaurant,Café,Farmers Market


# Cluster Neighborhoods

In [29]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_

array([1, 1, 2, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 2, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1,
       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 2, 1, 1, 0, 0, 0, 2, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 2, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 2,
       1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 0, 0, 2, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 4, 1, 1, 1, 1, 0, 1, 1, 3, 0, 1, 1],
      dtype=int32)

In [30]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = neighbors

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()# check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.75242,-79.329242,0.0,Park,Food & Drink Shop,Bus Stop,Falafel Restaurant,Dumpling Restaurant
1,M4A,North York,Victoria Village,43.7306,-79.313265,0.0,German Restaurant,Grocery Store,Park,Field,Fast Food Restaurant
2,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166,1.0,Coffee Shop,Bakery,Boat or Ferry,Theater,Shoe Store
3,M6A,North York,Lawrence Heights,43.72327,-79.451286,1.0,Clothing Store,Cosmetics Shop,Food Court,Sushi Restaurant,Furniture / Home Store
4,M6A,North York,Lawrence Manor,43.72327,-79.451286,1.0,Clothing Store,Cosmetics Shop,Food Court,Sushi Restaurant,Furniture / Home Store


In [31]:
toronto_merged.dropna(inplace = True)
toronto_merged['Cluster Labels'] = toronto_merged['Cluster Labels'].astype('int')

Finally, let's visualize the resulting clusters

In [32]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Examine Clusters

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,North York,0,Park,Food & Drink Shop,Bus Stop,Falafel Restaurant,Dumpling Restaurant
1,North York,0,German Restaurant,Grocery Store,Park,Field,Fast Food Restaurant
9,North York,0,Park,Gas Station,Burger Joint,Falafel Restaurant,Dumpling Restaurant
32,Scarborough,0,Construction & Landscaping,Park,Gym / Fitness Center,Fish & Chips Shop,Field
33,Scarborough,0,Construction & Landscaping,Park,Gym / Fitness Center,Fish & Chips Shop,Field
34,Scarborough,0,Construction & Landscaping,Park,Gym / Fitness Center,Fish & Chips Shop,Field
37,York,0,Park,Women's Store,Gym,Sporting Goods Shop,Mexican Restaurant
38,Scarborough,0,Coffee Shop,Business Service,Korean Restaurant,Park,Women's Store
59,East York,0,Italian Restaurant,Park,Farmers Market,Bar,Fish Market
60,Downtown Toronto,0,Park,Pier,Harbor / Marina,Falafel Restaurant,Donut Shop
