In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import urllib.request
import ssl

<h2> Using Beautiful Soup Package to extract the data </h2>

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

htmldata = urllib.request.urlopen(url, context = None)

In [3]:
bs_toronto = BeautifulSoup(htmldata, 'html.parser')
bs_toronto

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"dcd9736f-9ffc-4121-a19b-c1481d01bb53","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":979555370,"wgRevisionId":979555370,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description is different from Wikidata","Communications in Ontario","

In [4]:
tablehtmldata = bs_toronto.find_all('table', {'class':'wikitable sortable'})[0].find_all('tr')
tablehtmldata = tablehtmldata[1:]
len(tablehtmldata)

180

In [5]:
tablehtmldata[0].find_all('td')[0].get_text().strip()

'M1A'

In [6]:
data_dict = {}
data_dict['Postal Code'] = []
data_dict['Borough'] = []
data_dict['Neighborhood'] = []

In [7]:
columns = ['Postal Code','Borough','Neighborhood']
ind = [0,1,2]

for i in range(len(tablehtmldata)):
    data = tablehtmldata[i].find_all('td')
    for group,pos in zip(*[columns,ind]):
        data_dict[group].append(data[pos].get_text().strip())

data_dict


{'Postal Code': ['M1A',
  'M2A',
  'M3A',
  'M4A',
  'M5A',
  'M6A',
  'M7A',
  'M8A',
  'M9A',
  'M1B',
  'M2B',
  'M3B',
  'M4B',
  'M5B',
  'M6B',
  'M7B',
  'M8B',
  'M9B',
  'M1C',
  'M2C',
  'M3C',
  'M4C',
  'M5C',
  'M6C',
  'M7C',
  'M8C',
  'M9C',
  'M1E',
  'M2E',
  'M3E',
  'M4E',
  'M5E',
  'M6E',
  'M7E',
  'M8E',
  'M9E',
  'M1G',
  'M2G',
  'M3G',
  'M4G',
  'M5G',
  'M6G',
  'M7G',
  'M8G',
  'M9G',
  'M1H',
  'M2H',
  'M3H',
  'M4H',
  'M5H',
  'M6H',
  'M7H',
  'M8H',
  'M9H',
  'M1J',
  'M2J',
  'M3J',
  'M4J',
  'M5J',
  'M6J',
  'M7J',
  'M8J',
  'M9J',
  'M1K',
  'M2K',
  'M3K',
  'M4K',
  'M5K',
  'M6K',
  'M7K',
  'M8K',
  'M9K',
  'M1L',
  'M2L',
  'M3L',
  'M4L',
  'M5L',
  'M6L',
  'M7L',
  'M8L',
  'M9L',
  'M1M',
  'M2M',
  'M3M',
  'M4M',
  'M5M',
  'M6M',
  'M7M',
  'M8M',
  'M9M',
  'M1N',
  'M2N',
  'M3N',
  'M4N',
  'M5N',
  'M6N',
  'M7N',
  'M8N',
  'M9N',
  'M1P',
  'M2P',
  'M3P',
  'M4P',
  'M5P',
  'M6P',
  'M7P',
  'M8P',
  'M9P',
  'M1R',
  'M

In [8]:
df = pd.DataFrame(data = data_dict)


In [9]:
torontodf = df.loc[df['Borough'] != 'Not assigned']
torontodf.reset_index(drop = True, inplace = True)
torontodf

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


<h2>Obtaining the latitude and longitude for each Postal Code</h2>

In [10]:
latlng = pd.read_csv('Geospatial_Coordinates.csv')
latlng

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [11]:
torontodf_latlng = torontodf.join(latlng.set_index('Postal Code'), on = 'Postal Code')
torontodf_latlng

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [12]:
import folium
torontolatlng = [43.6532,-79.3832]
torontomap = folium.Map(location = torontolatlng, zoom_start = 13)


for lat,lng,postal_code,borough,neighborhood in zip(torontodf_latlng['Latitude'],
                                                     torontodf_latlng['Longitude'],
                                                     torontodf_latlng['Postal Code'],
                                                     torontodf_latlng['Borough'],
                                                     torontodf_latlng['Neighborhood']):
    label = 'Postal : {} \n Neighborhood: {} \n Borough:{}'.format(postal_code,neighborhood,borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(torontomap) 

torontomap

In [13]:
neighborhood

'Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West'

In [14]:
CLIENT_ID = 'DVWJRKZQ20BUTZMTMAVW0355V1C5HQQ2WBFUT2DGW4GKQ5DR' # your Foursquare ID
CLIENT_SECRET = 'VKIQPVI13LA0MFW5ELOK3AVHO1NOYYQTIDW0ARSJWQDOQWIO'
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: DVWJRKZQ20BUTZMTMAVW0355V1C5HQQ2WBFUT2DGW4GKQ5DR
CLIENT_SECRET:VKIQPVI13LA0MFW5ELOK3AVHO1NOYYQTIDW0ARSJWQDOQWIO


Getting the top 100 venues in a neighborhood

In [15]:
import urllib.parse
import json

neighborhood = list()
neighborhood_lat = list()
neighborhood_lng = list()
venues = list()
venue_lat = list()
venue_lng = list()
venue_cat = list()


names_list = torontodf_latlng['Neighborhood']
lat_list = torontodf_latlng['Latitude']
lng_list = torontodf_latlng['Longitude']

baseurl = 'https://api.foursquare.com/v2/venues/explore?'
parameters = dict()
parameters['client_id'] = CLIENT_ID
parameters['client_secret'] = CLIENT_SECRET
parameters['v'] = VERSION

for name,lat,lng in zip(names_list,lat_list,lng_list):

    parameters['ll'] = '{},{}'.format(lat,lng)
    parameters['radius'] = 500
    parameters['limit'] = 100

    url = baseurl + urllib.parse.urlencode(parameters, safe = ',')
    urldata = urllib.request.urlopen(url).read()
    data = json.loads(urldata)

    neighborhood_venues = data['response']['groups'][0]['items']

    for venue in neighborhood_venues:
        neighborhood.append(name)
        neighborhood_lat.append(lat)
        neighborhood_lng.append(lng)
        venues.append(venue['venue']['name'])     
        venue_lat.append(venue['venue']['location']['lat'])
        venue_lng.append(venue['venue']['location']['lng'])
        venue_cat.append(venue['venue']['categories'][0]['name'])
        


In [16]:
nearby_venues = pd.DataFrame({'Neighborhood':neighborhood,'Neighborhood Latitude':neighborhood_lat,
                  'Neighborhood Longitude':neighborhood_lng,
                  'Venue':venues,
                  'Venue Latitude':venue_lat,
                  'Venue Longitude':venue_lng,
                  'Venue Category':venue_cat})
    
nearby_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.332140,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
5,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.313620,Intersection
6,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.312860,Pizza Place
7,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
8,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
9,"Regent Park, Harbourfront",43.654260,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center


Count each category in a neighborhood to get the most popular category of Venues

In [17]:
toronto_onehot = pd.get_dummies(nearby_venues[['Venue Category']],prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = nearby_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
venuecounts = toronto_onehot.groupby('Neighborhood').mean().reset_index()
venuecounts

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
1,"Alderwood, Long Branch",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
3,Bayview Village,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
4,"Bedford Park, Lawrence Manor East",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
5,Berczy Park,0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.018182,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
6,"Birch Cliff, Cliffside West",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
8,"Business reply mail Processing Centre, South C...",0.000000,0.000000,0.000000,0.0000,0.0000,0.0000,0.000,0.000,0.0000,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.000000,0.000000,0.000000,0.0625,0.0625,0.0625,0.125,0.125,0.0625,...,0.000000,0.00,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000


In [19]:
venuecounts.iloc[0,1:].sort_values(ascending = False).index.values[:10]

array(['Lounge', 'Latin American Restaurant', 'Skating Rink',
       'Breakfast Spot', 'Clothing Store', 'Dumpling Restaurant',
       'Distribution Center', 'Dog Run', 'Doner Restaurant', 'Donut Shop'],
      dtype=object)

In [20]:
most_popular_num = 10

columns = list()
for i in range(most_popular_num):
    columns.append('{} most popular'.format(i + 1)) 
    
sorted_df = pd.DataFrame(columns = columns)
sorted_df['Neighborhood']  = venuecounts['Neighborhood']
columns_sorted = [sorted_df.columns[-1],*sorted_df.columns[:-1].tolist()]
sorted_df = sorted_df[columns_sorted]
sorted_df

Unnamed: 0,Neighborhood,1 most popular,2 most popular,3 most popular,4 most popular,5 most popular,6 most popular,7 most popular,8 most popular,9 most popular,10 most popular
0,Agincourt,,,,,,,,,,
1,"Alderwood, Long Branch",,,,,,,,,,
2,"Bathurst Manor, Wilson Heights, Downsview North",,,,,,,,,,
3,Bayview Village,,,,,,,,,,
4,"Bedford Park, Lawrence Manor East",,,,,,,,,,
5,Berczy Park,,,,,,,,,,
6,"Birch Cliff, Cliffside West",,,,,,,,,,
7,"Brockton, Parkdale Village, Exhibition Place",,,,,,,,,,
8,"Business reply mail Processing Centre, South C...",,,,,,,,,,
9,"CN Tower, King and Spadina, Railway Lands, Har...",,,,,,,,,,


In [21]:
for i in range(sorted_df.shape[0]):
    sorted_df.iloc[i,1:] = venuecounts.iloc[i,1:].sort_values(ascending = False).index.values[:10]

sorted_df

Unnamed: 0,Neighborhood,1 most popular,2 most popular,3 most popular,4 most popular,5 most popular,6 most popular,7 most popular,8 most popular,9 most popular,10 most popular
0,Agincourt,Lounge,Latin American Restaurant,Skating Rink,Breakfast Spot,Clothing Store,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Sandwich Place,Coffee Shop,Pub,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Frozen Yogurt Shop,Shopping Mall,Bridal Shop,Sandwich Place,Diner,Restaurant,Deli / Bodega,Supermarket
3,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Women's Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Grocery Store,Thai Restaurant,Comfort Food Restaurant,Juice Bar,Butcher,Restaurant,Café
5,Berczy Park,Coffee Shop,Farmers Market,Restaurant,Beer Bar,Seafood Restaurant,Cocktail Bar,Bakery,Cheese Shop,Shopping Mall,Sporting Goods Shop
6,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,General Entertainment,Café,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
7,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Performing Arts Venue
8,"Business reply mail Processing Centre, South C...",Skate Park,Light Rail Station,Butcher,Recording Studio,Auto Workshop,Burrito Place,Garden,Garden Center,Farmers Market,Fast Food Restaurant
9,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Boutique,Airport Terminal,Rental Car Location,Coffee Shop,Plane,Boat or Ferry,Sculpture Garden,Harbor / Marina


In [22]:
from sklearn.cluster import KMeans

kclusters = 5

toronto_cluster_df = venuecounts.drop('Neighborhood',axis = 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_cluster_df)
kmeans.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 0, 1, 1, 1, 0,
       1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
       0, 1, 1, 1, 1, 1, 0, 1])

In [23]:
sorted_df.insert(0, 'Cluster Labels', kmeans.labels_)


In [24]:
sorted_df

Unnamed: 0,Cluster Labels,Neighborhood,1 most popular,2 most popular,3 most popular,4 most popular,5 most popular,6 most popular,7 most popular,8 most popular,9 most popular,10 most popular
0,1,Agincourt,Lounge,Latin American Restaurant,Skating Rink,Breakfast Spot,Clothing Store,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,1,"Alderwood, Long Branch",Pizza Place,Pharmacy,Gym,Sandwich Place,Coffee Shop,Pub,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner
2,1,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Frozen Yogurt Shop,Shopping Mall,Bridal Shop,Sandwich Place,Diner,Restaurant,Deli / Bodega,Supermarket
3,1,Bayview Village,Café,Japanese Restaurant,Chinese Restaurant,Bank,Women's Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,1,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Coffee Shop,Grocery Store,Thai Restaurant,Comfort Food Restaurant,Juice Bar,Butcher,Restaurant,Café
5,1,Berczy Park,Coffee Shop,Farmers Market,Restaurant,Beer Bar,Seafood Restaurant,Cocktail Bar,Bakery,Cheese Shop,Shopping Mall,Sporting Goods Shop
6,1,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,General Entertainment,Café,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
7,1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Nightclub,Pet Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Performing Arts Venue
8,1,"Business reply mail Processing Centre, South C...",Skate Park,Light Rail Station,Butcher,Recording Studio,Auto Workshop,Burrito Place,Garden,Garden Center,Farmers Market,Fast Food Restaurant
9,1,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Boutique,Airport Terminal,Rental Car Location,Coffee Shop,Plane,Boat or Ferry,Sculpture Garden,Harbor / Marina


In [25]:
finaldf = torontodf_latlng.join(sorted_df.set_index('Neighborhood'), on = 'Neighborhood')
finaldf.dropna(inplace = True)
finaldf

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1 most popular,2 most popular,3 most popular,4 most popular,5 most popular,6 most popular,7 most popular,8 most popular,9 most popular,10 most popular
0,M3A,North York,Parkwoods,43.753259,-79.329656,0.0,Park,Food & Drink Shop,Women's Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,1.0,Intersection,Pizza Place,Coffee Shop,Hockey Arena,Portuguese Restaurant,Donut Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,1.0,Coffee Shop,Park,Pub,Bakery,Breakfast Spot,Café,Theater,Hotel,Chocolate Shop,Spa
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1.0,Clothing Store,Women's Store,Coffee Shop,Event Space,Furniture / Home Store,Gift Shop,Boutique,Accessories Store,Vietnamese Restaurant,Convenience Store
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Yoga Studio,Sushi Restaurant,Bar,Beer Bar,Smoothie Shop,Sandwich Place,Restaurant,Café,Portuguese Restaurant
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1.0,Fast Food Restaurant,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Hardware Store
7,M3B,North York,Don Mills,43.745906,-79.352188,1.0,Gym,Beer Store,Coffee Shop,Japanese Restaurant,Sporting Goods Shop,Asian Restaurant,Italian Restaurant,Supermarket,Café,Discount Store
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,1.0,Pizza Place,Pharmacy,Athletics & Sports,Gastropub,Intersection,Pet Store,Bus Line,Breakfast Spot,Bank,Gym / Fitness Center
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1.0,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Bubble Tea Shop,Cosmetics Shop,Furniture / Home Store,Diner,Fast Food Restaurant,Pizza Place
10,M6B,North York,Glencairn,43.709577,-79.445073,1.0,Park,Pizza Place,Japanese Restaurant,Bakery,Pub,Doner Restaurant,Diner,Discount Store,Distribution Center,Dog Run


In [26]:
import matplotlib.cm as cm
import matplotlib.colors as colors
# create map
map_clusters = folium.Map(location=[43.6532,-79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(finaldf['Latitude'], finaldf['Longitude'], finaldf['Neighborhood'], finaldf['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters