# Coursera IBM Data Science Capstone Project
## Toronto Neighborhood Analysis - Part C

In [1]:
import pandas as pd
import numpy as np

import json
from geopy.geocoders import Nominatim

import requests 
from pandas.io.json import json_normalize

# Matplotlib
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

In [2]:
df_toronto_postal_geo = pd.read_pickle('toronto_postal_geo.pckl')

In [3]:
df_toronto_postal_geo

Unnamed: 0,Postcode,Borough,Neighbourhoods,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...,...,...
98,M9N,York,Weston,M9N,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,M9P,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village,Martin Grove Gardens,Richvie...",M9R,43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",M9V,43.739416,-79.588437


### Test geolocator
Note that it may be neccessary to repeattedly run this cell due to timeout issues

In [4]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="coursera_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [5]:
# create map of Toronto using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, postcode, borough, neighborhood in zip(df_toronto_postal_geo['Latitude'], df_toronto_postal_geo['Longitude'], df_toronto_postal_geo['Postcode'], df_toronto_postal_geo['Borough'], df_toronto_postal_geo['Neighbourhoods']):
    label = '{}, {}'.format(neighborhood, borough, postcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

In [6]:
map_boroughs_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto_postal_geo['Latitude'], df_toronto_postal_geo['Longitude'], df_toronto_postal_geo['Borough']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_boroughs_toronto)  

map_boroughs_toronto

### Now get Foursquare data
Please note that there may be timeout or limited access issues again

Set up Authentication

In [7]:
CLIENT_ID = 'MU4OZMN3032OXPGG5JSJSFKLQ0F0JJ4JDITTP4B4FG3J5RB2' # your Foursquare ID
CLIENT_SECRET = 'KXMHI0MF40B3G2A3DCGNGEHHGSJ2BPF2LQNCPE0KCNNKDAB1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: MU4OZMN3032OXPGG5JSJSFKLQ0F0JJ4JDITTP4B4FG3J5RB2
CLIENT_SECRET:KXMHI0MF40B3G2A3DCGNGEHHGSJ2BPF2LQNCPE0KCNNKDAB1


### Testing with first Postal Code Area

In [17]:
neighborhood_latitude = df_toronto_postal_geo.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_toronto_postal_geo.loc[0, 'Longitude'] # neighborhood longitude value

postal_code = df_toronto_postal_geo.loc[0, 'Postcode'] # Postalcode
borough_name = df_toronto_postal_geo.loc[0, 'Borough'] # borough name
neighborhood_name = df_toronto_postal_geo.loc[0, 'Neighbourhoods'] # neighborhood name

print('Latitude and longitude values of {} with postal code {} in borough {} are {}, {}.'.format(neighborhood_name, 
                                                                                         postal_code,
                                                                                         borough_name,
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge,Malvern with postal code M1B in borough Scarborough are 43.806686299999996, -79.19435340000001.


### Get venues from Foursquare

In [39]:
# type your answer here
LIMIT = 100
radius = 500

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=MU4OZMN3032OXPGG5JSJSFKLQ0F0JJ4JDITTP4B4FG3J5RB2&client_secret=KXMHI0MF40B3G2A3DCGNGEHHGSJ2BPF2LQNCPE0KCNNKDAB1&v=20180605&ll=43.806686299999996,-79.19435340000001&radius=500&limit=100'

In [41]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5dc0f423a1db64002c553a53'},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.8111863045, 'lng': -79.18812958073042},
   'sw': {'lat': 43.80218629549999, 'lng': -79.2005772192696}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bb6b9446edc76b0d771311c',
       'name': "Wendy's",
       'location': {'crossStreet': 'Morningside & Sheppard',
        'lat': 43.80744841934756,
        'lng': -79.19905558052072,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'lng': -79.19905558052072}],
        'distance': 387,
        'cc': 'CA',
        'city': 'Toronto',
    

In [42]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### get venues for first Postal Code Area

In [43]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON
# print(0, nearby_venues.head())

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
# print(1, nearby_venues.head())

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# print('II', nearby_venues.head())

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
print('III', nearby_venues.head(15))

III                     name            categories        lat        lng
0                Wendy's  Fast Food Restaurant  43.807448 -79.199056
1  Interprovincial Group            Print Shop  43.805630 -79.200378


In [44]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


In [45]:
def getNearbyVenues(postcodes, boroughs, names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for postcode, borough, name, lat, lng in zip(postcodes, boroughs, names, latitudes, longitudes):
        print(postcode, borough, name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            postcode,
            borough,
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode',
                             'Borough',
                             'Neighborhood', 
                             'Neighborhood Latitude', 
                             'Neighborhood Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    
    return(nearby_venues)

In [46]:
toronto_venues = getNearbyVenues(postcodes=df_toronto_postal_geo['Postcode'],
                                 boroughs=df_toronto_postal_geo['Borough'],
                                 names=df_toronto_postal_geo['Neighbourhoods'],
                                 latitudes=df_toronto_postal_geo['Latitude'],
                                 longitudes=df_toronto_postal_geo['Longitude'])

M1B Scarborough Rouge,Malvern
M1C Scarborough Highland Creek,Rouge Hill,Port Union
M1E Scarborough Guildwood,Morningside,West Hill
M1G Scarborough Woburn
M1H Scarborough Cedarbrae
M1J Scarborough Scarborough Village
M1K Scarborough East Birchmount Park,Ionview,Kennedy Park
M1L Scarborough Clairlea,Golden Mile,Oakridge
M1M Scarborough Cliffcrest,Cliffside,Scarborough Village West
M1N Scarborough Birch Cliff,Cliffside West
M1P Scarborough Dorset Park,Scarborough Town Centre,Wexford Heights
M1R Scarborough Maryvale,Wexford
M1S Scarborough Agincourt
M1T Scarborough Clarks Corners,Sullivan,Tam O'Shanter
M1V Scarborough Agincourt North,L'Amoreaux East,Milliken,Steeles East
M1W Scarborough L'Amoreaux West
M1X Scarborough Upper Rouge
M2H North York Hillcrest Village
M2J North York Fairview,Henry Farm,Oriole
M2K North York Bayview Village
M2L North York Silver Hills,York Mills
M2M North York Newtonbrook,Willowdale
M2N North York Willowdale South
M2P North York York Mills West
M2R North York Wil

In [47]:
print(toronto_venues.shape)
toronto_venues.head()

(2265, 9)


Unnamed: 0,Postcode,Borough,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Chris Effects Painting,43.784343,-79.163742,Construction & Landscaping
3,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
4,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place


In [56]:
# store toronto venue data
toronto_venues.to_pickle('toronto_venue_data.pckl')

### Count venues per borough and number of unique entries

In [48]:
toronto_venues.groupby('Borough').count()

Unnamed: 0_level_0,Postcode,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Central Toronto,115,115,115,115,115,115,115,115
Downtown Toronto,1291,1291,1291,1291,1291,1291,1291,1291
East Toronto,122,122,122,122,122,122,122,122
East York,79,79,79,79,79,79,79,79
Etobicoke,73,73,73,73,73,73,73,73
Mississauga,11,11,11,11,11,11,11,11
North York,247,247,247,247,247,247,247,247
Queen's Park,42,42,42,42,42,42,42,42
Scarborough,85,85,85,85,85,85,85,85
West Toronto,182,182,182,182,182,182,182,182


In [49]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 274 uniques categories.


### Prepare venue data for clustering by spreading features

In [51]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add borough column back to dataframe
toronto_onehot['Borough'] = toronto_venues['Borough'] 

# move Borough column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head(25)

(2265, 275)


Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Scarborough,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
toronto_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
print(toronto_grouped.shape)
toronto_grouped.head()

(11, 275)


Unnamed: 0,Borough,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017391,...,0.0,0.008696,0.0,0.0,0.008696,0.0,0.0,0.0,0.0,0.008696
1,Downtown Toronto,0.0,0.000775,0.000775,0.000775,0.000775,0.001549,0.001549,0.001549,0.015492,...,0.002324,0.011619,0.002324,0.0,0.005422,0.0,0.006197,0.000775,0.000775,0.002324
2,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02459,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02459
3,East York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.012658,0.0,0.012658,0.0,0.0,0.0,0.012658
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0


In [55]:
num_top_venues = 5

for borough in toronto_grouped['Borough']:
    print("----"+borough+"----")
    temp = toronto_grouped[toronto_grouped['Borough'] == borough].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
            venue  freq
0     Coffee Shop  0.07
1  Sandwich Place  0.06
2            Café  0.05
3            Park  0.05
4     Pizza Place  0.04


----Downtown Toronto----
                venue  freq
0         Coffee Shop  0.10
1                Café  0.05
2          Restaurant  0.03
3               Hotel  0.03
4  Italian Restaurant  0.03


----East Toronto----
                venue  freq
0    Greek Restaurant  0.07
1         Coffee Shop  0.06
2  Italian Restaurant  0.05
3                Café  0.04
4      Ice Cream Shop  0.04


----East York----
                 venue  freq
0          Coffee Shop  0.08
1         Burger Joint  0.05
2                 Park  0.04
3  Sporting Goods Shop  0.04
4                 Bank  0.04


----Etobicoke----
            venue  freq
0     Pizza Place  0.11
1  Sandwich Place  0.07
2        Pharmacy  0.05
3             Gym  0.04
4     Coffee Shop  0.04


----Mississauga----
                       venue  freq
0                Coffee Shop  0

In [57]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

### create the new dataframe and display the top 10 venues for each neighborhood.

In [64]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
boroughs_venues_sorted = pd.DataFrame(columns=columns)
boroughs_venues_sorted['Borough'] = toronto_grouped['Borough']

for ind in np.arange(toronto_grouped.shape[0]):
    boroughs_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

boroughs_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Sandwich Place,Café,Park,Pizza Place,Clothing Store,Restaurant,Sushi Restaurant,Dessert Shop,Gym
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Hotel,Italian Restaurant,Bakery,Bar,Japanese Restaurant,Park,Seafood Restaurant
2,East Toronto,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Café,Park,Brewery,American Restaurant,Yoga Studio,Pizza Place
3,East York,Coffee Shop,Burger Joint,Bank,Pharmacy,Park,Pizza Place,Sporting Goods Shop,Supermarket,Bus Line,Sandwich Place
4,Etobicoke,Pizza Place,Sandwich Place,Pharmacy,Fast Food Restaurant,Coffee Shop,Grocery Store,Gym,Discount Store,Bakery,Café


### Now do the clustering

In [65]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Borough', axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
#manhattan_grouped_clustering.head()

array([1, 1, 1, 3, 3, 0, 1, 4, 3, 1], dtype=int32)

### now collect results

In [66]:
# add clustering labels
boroughs_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto_postal_geo

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(boroughs_venues_sorted.set_index('Borough'), on='Borough')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhoods,Postal Code,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge,Malvern",M1B,43.806686,-79.194353,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",M1C,43.784535,-79.160497,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",M1E,43.763573,-79.188711,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park


In [67]:
### ... and finally the map

In [68]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="coursera_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [71]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, bor, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(bor) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine clusters

In [73]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
86,Mississauga,-79.615819,0,Coffee Shop,Hotel,Mediterranean Restaurant,Gym / Fitness Center,Sandwich Place,Fried Chicken Joint,Burrito Place,Middle Eastern Restaurant,American Restaurant,Yoga Studio


In [74]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,North York,-79.363452,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place
18,North York,-79.346556,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place
19,North York,-79.385975,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place
20,North York,-79.374714,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place
21,North York,-79.408493,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place
...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,West Toronto,-79.456325,1,Bar,Coffee Shop,Café,Restaurant,Italian Restaurant,Bakery,Pizza Place,Music Venue,Gift Shop,Asian Restaurant
84,West Toronto,-79.484450,1,Bar,Coffee Shop,Café,Restaurant,Italian Restaurant,Bakery,Pizza Place,Music Venue,Gift Shop,Asian Restaurant
87,East Toronto,-79.321558,1,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Café,Park,Brewery,American Restaurant,Yoga Studio,Pizza Place
96,North York,-79.565963,1,Coffee Shop,Clothing Store,Fast Food Restaurant,Japanese Restaurant,Grocery Store,Pizza Place,Park,Bank,Café,Sandwich Place


In [75]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
73,York,-79.428191,2,Park,Convenience Store,Bar,Pizza Place,Restaurant,Caribbean Restaurant,Discount Store,Sandwich Place,Bus Line,Market
74,York,-79.453512,2,Park,Convenience Store,Bar,Pizza Place,Restaurant,Caribbean Restaurant,Discount Store,Sandwich Place,Bus Line,Market
80,York,-79.476013,2,Park,Convenience Store,Bar,Pizza Place,Restaurant,Caribbean Restaurant,Discount Store,Sandwich Place,Bus Line,Market
81,York,-79.487262,2,Park,Convenience Store,Bar,Pizza Place,Restaurant,Caribbean Restaurant,Discount Store,Sandwich Place,Bus Line,Market
98,York,-79.518188,2,Park,Convenience Store,Bar,Pizza Place,Restaurant,Caribbean Restaurant,Discount Store,Sandwich Place,Bus Line,Market


In [76]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,-79.194353,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
1,Scarborough,-79.160497,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
2,Scarborough,-79.188711,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
3,Scarborough,-79.216917,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
4,Scarborough,-79.239476,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
5,Scarborough,-79.239476,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
6,Scarborough,-79.262029,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
7,Scarborough,-79.284577,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
8,Scarborough,-79.239476,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park
9,Scarborough,-79.264848,3,Fast Food Restaurant,Pizza Place,Chinese Restaurant,Coffee Shop,Breakfast Spot,Bakery,Noodle House,Thai Restaurant,Indian Restaurant,Park


In [77]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,Queen's Park,-79.389494,4,Coffee Shop,College Cafeteria,Diner,Park,Gym,Yoga Studio,Wings Joint,Seafood Restaurant,Burger Joint,Sandwich Place
