# clustering the Neighborhoods

In [2]:
import numpy as np
import pandas as pd
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # map rendering library

In [3]:
#We upload the first dataset where we save it as csv file in part 2
df=pd.read_csv('Capstone_part2.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,0,M3A,North York,Parkwoods,43.753259,-79.329656
1,1,M4A,North York,Victoria Village,43.725882,-79.315572
2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## Exploring the data

In [4]:
#Exploring boroughs that contain york 
df_explore=df[df['Borough'].str.contains('York')]
df_explore=df_explore.reset_index(drop=True)
df_explore.head()

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,0,M3A,North York,Parkwoods,43.753259,-79.329656
1,1,M4A,North York,Victoria Village,43.725882,-79.315572
2,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,7,M3B,North York,Don Mills North,43.745906,-79.352188
4,8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937


In [5]:
df_explore.shape

(34, 6)

### Create a map of ontario with the York Borough as markers


In [6]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [7]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_explore['Latitude'], df_explore['Longitude'], df_explore['Borough'], df_explore['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## EXPLORE NEIGHBORHOODS

In [8]:
CLIENT_ID = 'EZFJAPUICBBIXMEF25HZO15IKAF310I0YNE5QWDHG3K0ZTVF' # your Foursquare ID
CLIENT_SECRET = 'QXHVTCS3WELJSIMLTPAGFNBHJ35ETV3VJSTS4ABJDEE51W1S' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EZFJAPUICBBIXMEF25HZO15IKAF310I0YNE5QWDHG3K0ZTVF
CLIENT_SECRET:QXHVTCS3WELJSIMLTPAGFNBHJ35ETV3VJSTS4ABJDEE51W1S


### Let's explore the first neighborhood in our dataframe.

#### Get the neighborhood's name.

In [9]:
df_explore.loc[0, 'Neighborhood']

'Parkwoods'

In [10]:
#Exploring the venues around the neighborhood 
neighborhood_latitude = df_explore.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_explore.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_explore.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [11]:
#Now, let's get the top 100 venues that are in Parkwoods within a radius of 500 meters.
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=EZFJAPUICBBIXMEF25HZO15IKAF310I0YNE5QWDHG3K0ZTVF&client_secret=QXHVTCS3WELJSIMLTPAGFNBHJ35ETV3VJSTS4ABJDEE51W1S&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100'

In [12]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60a52cedcbad496236a9650e'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

In [13]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [14]:
#Getting each nearby venue
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [15]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.


## Explore Neighborhoods of boroughs that contain York in Ontario

#### Let's create a function to repeat the same process to all the York neighborhoods in Ontario

In [16]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Getting all the nearby venues in the york Borough 

In [17]:
York = getNearbyVenues(names=df_explore['Neighborhood'],
                                   latitudes=df_explore['Latitude'],
                                   longitudes=df_explore['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills North
Parkview Hill, Woodbine Gardens
Glencairn
Don Mills South
Woodbine Heights
Humewood-Cedarvale
Caledonia-Fairbanks
Leaside
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Bayview Village
Downsview East
York Mills, Silver Hills
Downsview West
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview Central
Bedford Park, Lawrence Manor East
Del Ray, Mount Dennis, Keelsdale and Silverthorn
Humberlea, Emery
Willowdale South
Downsview Northwest
Runnymede, The Junction North
Weston
York Mills West
Willowdale West


In [18]:
#Confirming the function iterated through the neighborhoods
df_explore['Neighborhood']

0                                            Parkwoods
1                                     Victoria Village
2                     Lawrence Manor, Lawrence Heights
3                                      Don Mills North
4                      Parkview Hill, Woodbine Gardens
5                                            Glencairn
6                                      Don Mills South
7                                     Woodbine Heights
8                                   Humewood-Cedarvale
9                                  Caledonia-Fairbanks
10                                             Leaside
11                                   Hillcrest Village
12     Bathurst Manor, Wilson Heights, Downsview North
13                                    Thorncliffe Park
14                        Fairview, Henry Farm, Oriole
15                     Northwood Park, York University
16                                  The Danforth  East
17                                     Bayview Village
18        

In [19]:
print(York.shape)
York.head()

(343, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [20]:
#Count the venues in each neighborhood
York.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",25,25,25,25,25,25
Caledonia-Fairbanks,4,4,4,4,4,4
"Del Ray, Mount Dennis, Keelsdale and Silverthorn",4,4,4,4,4,4
Don Mills North,4,4,4,4,4,4
Don Mills South,19,19,19,19,19,19
Downsview Central,3,3,3,3,3,3
Downsview East,3,3,3,3,3,3
Downsview Northwest,4,4,4,4,4,4


In [21]:
#Let's find out how many unique categories can be curated from all the returned venues
print('There are {} uniques categories.'.format(len(York['Venue Category'].unique())))

There are 121 uniques categories.


#### Exploring the Neighbourhoods

In [22]:
# one hot encoding
York_onehot = pd.get_dummies(York[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
York_onehot['Neighborhood'] = York['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [York_onehot.columns[-1]] + list(York_onehot.columns[:-1])
York_onehot = York_onehot[fixed_columns]

York_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
York_onehot.shape

(343, 122)

In [24]:
York_grouped = York_onehot.groupby('Neighborhood').mean().reset_index()
York_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Sushi Restaurant,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Don Mills South,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Downsview East,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Downsview Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
York_grouped.shape

(34, 122)

In [26]:
temp = York_grouped[York_grouped['Neighborhood'] == 'Parkview Hill, Woodbine Gardens'].T.reset_index()
temp

Unnamed: 0,index,21
0,Neighborhood,"Parkview Hill, Woodbine Gardens"
1,Accessories Store,0
2,Airport,0
3,American Restaurant,0
4,Art Gallery,0
...,...,...
117,Video Game Store,0
118,Vietnamese Restaurant,0
119,Warehouse Store,0
120,Women's Store,0


#### Let's print each neighborhood along with the top 5 most common venues

In [27]:
num_top_venues = 5

for hood in York_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = York_grouped[York_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0                Coffee Shop  0.09
1                       Bank  0.09
2                Pizza Place  0.04
3                Supermarket  0.04
4  Middle Eastern Restaurant  0.04


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2  Japanese Restaurant  0.25
3                 Bank  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                     venue  freq
0       Italian Restaurant  0.08
1               Restaurant  0.08
2           Sandwich Place  0.08
3              Coffee Shop  0.08
4  Comfort Food Restaurant  0.04


----Caledonia-Fairbanks----
               venue  freq
0               Park  0.50
1      Women's Store  0.25
2               Pool  0.25
3  Accessories Store  0.00
4       Liquor Store  0.00


----Del Ray, Mount Dennis, Keelsdale and Silverthorn----
                  venue  freq
0   Fried C

#### Let's get the  top n most common venues each neighborhood 

In [28]:
#define a function that returns the top n venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [29]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = York_grouped['Neighborhood']

for ind in np.arange(York_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(York_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Middle Eastern Restaurant,Shopping Mall,Mobile Phone Shop,Deli / Bodega,Pharmacy,Pizza Place,Intersection,Bridal Shop
1,Bayview Village,Bank,Japanese Restaurant,Chinese Restaurant,Café,Yoga Studio,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Dance Studio
2,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Restaurant,Coffee Shop,Hobby Shop,Café,Liquor Store,Juice Bar,Pharmacy,Pizza Place
3,Caledonia-Fairbanks,Park,Women's Store,Pool,Yoga Studio,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Discount Store,Sandwich Place,Fried Chicken Joint,Fast Food Restaurant,Dance Studio,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Yoga Studio


## CLUSTER NEIGHBORHOOD

In [30]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

York_grouped_clustering = York_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(York_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([3, 3, 3, 0, 3, 3, 3, 3, 0, 3])

In [31]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

York_merged = df_explore

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
York_merged = York_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

York_merged.head() # check the last columns!

Unnamed: 0.1,Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Park,Food & Drink Shop,Fast Food Restaurant,Discount Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
1,1,M4A,North York,Victoria Village,43.725882,-79.315572,3,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Yoga Studio,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
2,3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,3,Clothing Store,Furniture / Home Store,Boutique,Coffee Shop,Miscellaneous Shop,Accessories Store,Vietnamese Restaurant,Distribution Center,Construction & Landscaping,Convenience Store
3,7,M3B,North York,Don Mills North,43.745906,-79.352188,3,Caribbean Restaurant,Gym,Café,Japanese Restaurant,Yoga Studio,Discount Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
4,8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3,Pizza Place,Breakfast Spot,Gym / Fitness Center,Intersection,Pharmacy,Pet Store,Café,Bank,Athletics & Sports,Flea Market


In [32]:
# create map showing the different clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, hood, cluster in zip(York_merged['Latitude'], York_merged['Longitude'], York_merged['Neighborhood'], York_merged['Cluster Labels']):
    label = folium.Popup(str(hood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Cluster

#### for cluster = 0

In [33]:
York_merged.loc[York_merged['Cluster Labels'] == 0, York_merged.columns[[2] + list(range(6, York_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Park,Food & Drink Shop,Fast Food Restaurant,Discount Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
5,North York,0,Bakery,Asian Restaurant,Pizza Place,Japanese Restaurant,Park,Construction & Landscaping,Convenience Store,Comfort Food Restaurant,Cosmetics Shop,Discount Store
9,York,0,Park,Women's Store,Pool,Yoga Studio,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
18,North York,0,Park,Airport,Business Service,Distribution Center,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio


#### for cluster = 1

In [34]:
York_merged.loc[York_merged['Cluster Labels'] == 1, York_merged.columns[[2] + list(range(6, York_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,East York/East Toronto,1,Park,Convenience Store,Chocolate Shop,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega
23,North York,1,Park,Chocolate Shop,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega
31,York,1,Park,Convenience Store,Chocolate Shop,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega
32,North York,1,Park,Construction & Landscaping,Convenience Store,Chocolate Shop,Coffee Shop,Comfort Food Restaurant,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega


#### for cluster = 2

In [35]:
York_merged.loc[York_merged['Cluster Labels'] == 2, York_merged.columns[[2] + list(range(6, York_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,North York,2,Baseball Field,Yoga Studio,Distribution Center,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega


#### for cluster = 3

In [36]:
York_merged.loc[York_merged['Cluster Labels'] == 3, York_merged.columns[[2] + list(range(6, York_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3,Coffee Shop,Pizza Place,Hockey Arena,Portuguese Restaurant,Yoga Studio,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
2,North York,3,Clothing Store,Furniture / Home Store,Boutique,Coffee Shop,Miscellaneous Shop,Accessories Store,Vietnamese Restaurant,Distribution Center,Construction & Landscaping,Convenience Store
3,North York,3,Caribbean Restaurant,Gym,Café,Japanese Restaurant,Yoga Studio,Discount Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
4,East York,3,Pizza Place,Breakfast Spot,Gym / Fitness Center,Intersection,Pharmacy,Pet Store,Café,Bank,Athletics & Sports,Flea Market
6,North York,3,Coffee Shop,Restaurant,Gym,Supermarket,Dim Sum Restaurant,Sandwich Place,Bike Shop,Beer Store,Sporting Goods Shop,Italian Restaurant
7,East York,3,Skating Rink,Curling Ice,Intersection,Beer Store,Dance Studio,Athletics & Sports,Bus Stop,Park,Cosmetics Shop,Convenience Store
8,York,3,Hockey Arena,Trail,Field,Home Service,Yoga Studio,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega,Dance Studio
10,East York,3,Coffee Shop,Sporting Goods Shop,Shopping Mall,Furniture / Home Store,Burger Joint,Bank,Restaurant,Mexican Restaurant,Department Store,Dessert Shop
11,North York,3,Golf Course,Pool,Mediterranean Restaurant,Fast Food Restaurant,Dog Run,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
12,North York,3,Coffee Shop,Bank,Middle Eastern Restaurant,Shopping Mall,Mobile Phone Shop,Deli / Bodega,Pharmacy,Pizza Place,Intersection,Bridal Shop


#### for cluster = 4

In [37]:
York_merged.loc[York_merged['Cluster Labels'] == 4, York_merged.columns[[2] + list(range(6, York_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,4,Martial Arts School,Yoga Studio,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Dance Studio,Deli / Bodega
