In [45]:
#imports
import pandas as pd
import numpy as np

import json
import requests


import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
df = pd.read_html(url,header=0)
df

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [4]:
len(df)

3

In [5]:
dfcodes = df[0]
dfcodes

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [6]:
#dropping codes that are unassigned to boroughs
dfcodes1 = dfcodes[~dfcodes['Borough'].str.contains('Not assigned',na=False)]
dfcodes1

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
#checking for unassigned values in neighborhood column
len(dfcodes1[dfcodes1['Neighbourhood'].str.contains('Not assigned')])


0

In [8]:
#checking to see if there are duplicate listings for postal codes.
dfcodes1['Postal Code'].value_counts

<bound method IndexOpsMixin.value_counts of 2      M3A
3      M4A
4      M5A
5      M6A
6      M7A
      ... 
160    M8X
165    M4Y
168    M7Y
169    M8Y
178    M8Z
Name: Postal Code, Length: 103, dtype: object>

it looks like the wikipedia page has been updated since the lesson was written, but we'll run the groupby for the sake of the lesson anyway.  


In [9]:
dfcodes2 = dfcodes1.groupby(dfcodes1['Postal Code'],sort=False,as_index=False).sum()


In [10]:
dfcodes2.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


I'm also guessing that the postal codes were placed in order on wikipedia,  I've kept the same order as the wikipedia page, but the image in the lesson plan looks like it has a random order.  I'm assuming that was what the wikipedia page used to display.  

In [11]:
dfcodes2.shape

(103, 3)

In [12]:
pip install geocoder

Note: you may need to restart the kernel to use updated packages.


In [12]:
import geocoder

print('geocoder imported!')

geocoder imported!


Geocoder was having issues loading so i ended up using pgeocode. 

In [12]:
!pip install pgeocode
import pgeocode
pgeocode.Nominatim('ca')
geolocator = pgeocode.Nominatim('ca')



Collecting pgeocode
  Downloading pgeocode-0.3.0-py3-none-any.whl (8.5 kB)
Installing collected packages: pgeocode
Successfully installed pgeocode-0.3.0


In [13]:
pcodes = list(dfcodes2['Postal Code'])

In [14]:
#getting lat and lng values

latitudes = []
longitudes = []
for i, pcode in enumerate(pcodes):
    g = geolocator.query_postal_code(pcode)
    
    if not g.empty:
        latitudes.append(g.latitude)
        longitudes.append(g.longitude)

In [15]:
#adding columns to DF
dfcodes2['Latitude'] = latitudes
dfcodes2['Longitude'] = longitudes
dfcodes2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889


In [16]:
dfcodes2.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7545,-79.33
1,M4A,North York,Victoria Village,43.7276,-79.3148
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.7223,-79.4504
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.6662,-79.5282
6,M1B,Scarborough,"Malvern, Rouge",43.8113,-79.193
7,M3B,North York,Don Mills,43.745,-79.359
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.7063,-79.3094
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783


In [17]:
dfcodes3 = dfcodes2.dropna()
dfcodes3.shape

(102, 5)

In [18]:
map_Toronto = folium.Map(location=[43.6532,-79.3832], zoom_start=10)

for lat, lng, borough, neighborhood in zip(dfcodes3['Latitude'], dfcodes3['Longitude'], dfcodes3['Borough'], dfcodes3['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [19]:
# @hidden_cell

CLIENT_ID = 'R2C03IKWUNFLCI5QBPTG1AD4WL1AOLRLBWCJTY0SEUM0KBDN' # your Foursquare ID
CLIENT_SECRET = 'PRFMTXX5CVBESQ1DEA4IASQ4O1ERVWBQGQPACWJN05EB2UKX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value


Let's check out the values we have for Boroughs.

In [20]:
dfcodes3['Borough'].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East Toronto         5
East York            5
Name: Borough, dtype: int64

In [21]:
boroughs = dfcodes3.groupby('Borough')

In [22]:
#Create downtown Toronto DF
downtownT = boroughs.get_group('Downtown Toronto')
downtownT.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756
20,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754


Let's take a closer look at the Garden District area.

In [23]:
downtownT.loc[9,'Neighbourhood']

'Garden District, Ryerson'

In [24]:
neighborhood_latitude = downtownT.loc[9, 'Latitude']
neighborhood_longitude = downtownT.loc[9, 'Longitude']
neighborhood_name = downtownT.loc[9, 'Neighbourhood']

In [25]:
LIMIT = 50
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=R2C03IKWUNFLCI5QBPTG1AD4WL1AOLRLBWCJTY0SEUM0KBDN&client_secret=PRFMTXX5CVBESQ1DEA4IASQ4O1ERVWBQGQPACWJN05EB2UKX&v=20180605&ll=43.6572,-79.3783&radius=500&limit=50'

In [27]:
results = requests.get(url).json()

In [30]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [31]:
venues = results['response']['groups'][0]['items']
    

nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Page One Cafe,Café,43.657772,-79.376073
1,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641
2,Blaze Pizza,Pizza Place,43.656518,-79.380015
3,Yonge-Dundas Square,Plaza,43.656054,-79.380495
4,Burrito Boyz,Burrito Place,43.656265,-79.378343


In [32]:
# This is the function from the Manhattan Lab will repeat this process for all neighborhoods in Downtown Toronto.
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [33]:
DowntownT_venues = getNearbyVenues(names=downtownT['Neighbourhood'],
                                   latitudes=downtownT['Latitude'],
                                   longitudes=downtownT['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [34]:
print(DowntownT_venues.shape)
DowntownT_venues.head()

(736, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.6555,-79.3626,Tandem Coffee,43.653559,-79.361809,Coffee Shop
1,"Regent Park, Harbourfront",43.6555,-79.3626,Roselle Desserts,43.653447,-79.362017,Bakery
2,"Regent Park, Harbourfront",43.6555,-79.3626,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,"Regent Park, Harbourfront",43.6555,-79.3626,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,"Regent Park, Harbourfront",43.6555,-79.3626,Body Blitz Spa East,43.654735,-79.359874,Spa


In [36]:
DowntownT_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,50,50,50,50,50,50
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",50,50,50,50,50,50
Central Bay Street,50,50,50,50,50,50
Christie,12,12,12,12,12,12
Church and Wellesley,50,50,50,50,50,50
"Commerce Court, Victoria Hotel",50,50,50,50,50,50
"First Canadian Place, Underground city",50,50,50,50,50,50
"Garden District, Ryerson",50,50,50,50,50,50
"Harbourfront East, Union Station, Toronto Islands",4,4,4,4,4,4
"Kensington Market, Chinatown, Grange Park",50,50,50,50,50,50


Let's see how many times each venue category appears.

In [37]:
DowntownT_venues['Venue Category'].value_counts

<bound method IndexOpsMixin.value_counts of 0         Coffee Shop
1              Bakery
2      Breakfast Spot
3         Yoga Studio
4                 Spa
            ...      
731     Historic Site
732      Burger Joint
733         Gastropub
734           Theater
735           Dog Run
Name: Venue Category, Length: 736, dtype: object>

In [38]:
print('There are {} uniques categories.'.format(len(DowntownT_venues['Venue Category'].unique())))

There are 153 uniques categories.


Lets use one hot encoding like we did in the manhattan assignment to explore the venue categories.

In [40]:
DowntownT_onehot = pd.get_dummies(DowntownT_venues[['Venue Category']], prefix="", prefix_sep="")


DowntownT_onehot['Neighborhood'] = DowntownT_venues['Neighborhood'] 


fixed_columns = [DowntownT_onehot.columns[-1]] + list(DowntownT_onehot.columns[:-1])
DowntownT_onehot = DowntownT_onehot[fixed_columns]

DowntownT_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
downtown_grouped = DowntownT_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,Berczy Park,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0
5,"Commerce Court, Victoria Hotel",0.0,0.0,0.06,0.02,0.0,0.0,0.04,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
6,"First Canadian Place, Underground city",0.0,0.0,0.06,0.02,0.0,0.0,0.04,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,...,0.02,0.02,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0
8,"Harbourfront East, Union Station, Toronto Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Kensington Market, Chinatown, Grange Park",0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.08,0.0,0.04,0.02


In [42]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                 venue  freq
0           Restaurant  0.06
1  Japanese Restaurant  0.06
2          Coffee Shop  0.06
3                 Café  0.06
4             Beer Bar  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0         Coffee Shop  0.08
1  Italian Restaurant  0.08
2                 Bar  0.06
3                Café  0.04
4                Park  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.16
1                Café  0.04
2  Italian Restaurant  0.04
3      Clothing Store  0.04
4     Bubble Tea Shop  0.04


----Christie----
           venue  freq
0  Grocery Store  0.25
1           Café  0.25
2           Park  0.08
3    Coffee Shop  0.08
4     Playground  0.08


----Church and Wellesley----
                 venue  freq
0          Yoga Studio  0.04
1           Restaurant  0.04
2  Japanese Restaurant  0.04
3         Burger Joi

Sort venues in descending order.

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [47]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Berczy Park,Restaurant,Japanese Restaurant,Coffee Shop,Café,Beer Bar
1,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Italian Restaurant,Bar,Café,Park
2,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Clothing Store,Bubble Tea Shop
3,Christie,Grocery Store,Café,Park,Coffee Shop,Playground
4,Church and Wellesley,Yoga Studio,Restaurant,Japanese Restaurant,Burger Joint,Gay Bar


In [48]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 0, 3, 0, 4, 4, 0, 2, 4], dtype=int32)

In [50]:
# dropping the 'u' to merge these columns
downtownT.rename(columns = {'Neighbourhood':'Neighborhood'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [55]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Label', kmeans.labels_)

downtown_merged1 = downtownT

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
downtown_merged1 = downtown_merged1.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged1.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Label,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626,0,0,Coffee Shop,Breakfast Spot,Yoga Studio,Health Food Store,Food Truck
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889,0,0,Gym,Sushi Restaurant,Diner,Ramen Restaurant,Music Venue
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783,0,0,Coffee Shop,Café,Clothing Store,Theater,Bookstore
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756,4,4,Café,Gastropub,Coffee Shop,Seafood Restaurant,Cosmetics Shop
20,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754,4,4,Restaurant,Japanese Restaurant,Coffee Shop,Café,Beer Bar


In [56]:
downtown_merged1 = downtown_merged1.drop('Cluster Label', axis=1)
downtown_merged1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6555,-79.3626,0,Coffee Shop,Breakfast Spot,Yoga Studio,Health Food Store,Food Truck
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.6641,-79.3889,0,Gym,Sushi Restaurant,Diner,Ramen Restaurant,Music Venue
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.6572,-79.3783,0,Coffee Shop,Café,Clothing Store,Theater,Bookstore
15,M5C,Downtown Toronto,St. James Town,43.6513,-79.3756,4,Café,Gastropub,Coffee Shop,Seafood Restaurant,Cosmetics Shop
20,M5E,Downtown Toronto,Berczy Park,43.6456,-79.3754,4,Restaurant,Japanese Restaurant,Coffee Shop,Café,Beer Bar


In [60]:
# create map
map_clusters = folium.Map(location=[43.65,-79.3626], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged1['Latitude'], downtown_merged1['Longitude'], downtown_merged1['Neighborhood'], downtown_merged1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters