In [9]:
# Install all needed packages
#pip install Beautifulsoup4
#pip install geocoder
!conda install -c conda-forge geopy --yes 
!conda install -c conda-forge geocoder --yes
!conda install -c conda-forge Beautifulsoup4 --yes
!conda install -c conda-forge folium=0.5.0 --yes 

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.49-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [10]:
#import libraries
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd  
import geocoder # import geocoder
from geopy.geocoders import Nominatim 
import folium # map rendering library

## PART 1 - Web Scrapping and Dataframe creation


In [12]:
# Make BeautifulSoup request
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'html.parser')

In [13]:
# Scrap the HTML to find the values in the table
My_table = soup.find('table',{'class':'wikitable sortable'})
columns = My_table.find_all('td')

col_new = []
for cols in range(0,len(columns)):
    col_new.append(columns[cols].text)

col_new = np.array(col_new).reshape(288,3)

In [14]:
# Creates a dataframe with the required columns 
df = pd.DataFrame(col_new, columns =['PostalCode', 'Borough', 'Neighborhood']) 
df['Neighborhood'] = df['Neighborhood'].replace('\n','', regex=True)

In [15]:
# Ignore cells with a borough that is not assigned. 
# If a cell has a borough but a not assigned neighborhood, then the neighborhood will be the same as the borough. 
df = df[df.Borough != 'Not assigned']
df['Neighborhood'] = [row[-2] if row[-1]=='Not assigned' else row[-1] for row in df.itertuples()]

In [16]:
# Put more than one neighborhood in one postal code area
df = df.groupby(['PostalCode','Borough'])['Neighborhood'].agg(lambda x: ', '.join(set(x))).reset_index()
df.shape
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek"
2,M1E,Scarborough,"West Hill, Morningside, Guildwood"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## PART 2 - Getting the latitude and longitude coordenates using Geocoder package

In [18]:
def get_geocoder(postal_code_from_df):
        # initialize your variable to None
        lat_lng_coords = None
        # loop until you get the coordinates
        while(lat_lng_coords is None):
            g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code_from_df.strip()))
            lat_lng_coords = g.latlng
            latitude = lat_lng_coords[0]
            longitude = lat_lng_coords[1]
        return latitude,longitude

df['Latitude'], df['Longitude'] = zip(*df['PostalCode'].apply(get_geocoder))
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.811525,-79.195517
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.78573,-79.15875
2,M1E,Scarborough,"West Hill, Morningside, Guildwood",43.76569,-79.175256
3,M1G,Scarborough,Woburn,43.768359,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


## PART 3 - Exploring and clustering the neighborhoods in Toronto 

In [19]:
# Setting the parameters
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_ontario")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto, Ontario are 43.653963, -79.387207.


In [20]:
#Showing the map with with the analisis result 

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(df['Latitude'], df['Longitude'], df['PostalCode'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(borough, post, neigh)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [22]:
# filter borough names that contain the word Toronto
borough_names = list(df.Borough.unique())

borough_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_toronto.append(x)
        
borough_toronto

['East Toronto', 'Central Toronto', 'Downtown Toronto', 'West Toronto']

In [24]:
# Remains only the neighborhoods that contais the word Toronto
df = df[df['Borough'].isin(borough_toronto)].reset_index(drop=True)
print(df.shape)
df.head()

(38, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676845,-79.295225
1,M4K,East Toronto,"Riverdale, The Danforth West",43.683262,-79.35512
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314673
3,M4M,East Toronto,Studio District,43.662766,-79.33483
4,M4N,Central Toronto,Lawrence Park,43.72816,-79.387085


In [25]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)


for lat, long, post, borough, neigh in zip(df['Latitude'], df['Longitude'], df['PostalCode'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(borough, post, neigh)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [77]:
# The code was removed by Watson Studio for sharing.

Your credentails:
CLIENT_ID: IMP0HDDM5LOW42FTDB5OOEVAZAMVN4Q5WUIL31VC4W5SGPKO
CLIENT_SECRET:1BUWK254U2OUF4JRMOOUWP20RI4X1M0K3M51ENO4DKVIYFX3


In [28]:
# Fiding stores in the radius of 500m
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neigh in zip(df['Latitude'], df['Longitude'], df['PostalCode'], df['Borough'], df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neigh,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [29]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1754, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676845,-79.295225,Glen Manor Ravine,43.676821,-79.293942,Trail
1,M4E,East Toronto,The Beaches,43.676845,-79.295225,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,M4E,East Toronto,The Beaches,43.676845,-79.295225,Grover Pub and Grub,43.679181,-79.297215,Pub
3,M4E,East Toronto,The Beaches,43.676845,-79.295225,Upper Beaches,43.680563,-79.292869,Neighborhood
4,M4K,East Toronto,"Riverdale, The Danforth West",43.683262,-79.35512,Dairy Queen,43.684223,-79.357062,Fast Food Restaurant


In [30]:
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,4,4,4,4,4,4
M4K,East Toronto,"Riverdale, The Danforth West",5,5,5,5,5,5
M4L,East Toronto,"The Beaches West, India Bazaar",19,19,19,19,19,19
M4M,East Toronto,Studio District,51,51,51,51,51,51
M4N,Central Toronto,Lawrence Park,2,2,2,2,2,2
M4P,Central Toronto,Davisville North,8,8,8,8,8,8
M4R,Central Toronto,North Toronto West,4,4,4,4,4,4
M4S,Central Toronto,Davisville,26,26,26,26,26,26
M4T,Central Toronto,"Summerhill East, Moore Park",2,2,2,2,2,2
M4V,Central Toronto,"Forest Hill SE, Deer Park, Rathnelly, Summerhill West, South Hill",7,7,7,7,7,7


In [31]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(1754, 214)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4K,East Toronto,"Riverdale, The Danforth West",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped

(37, 214)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,M4E,East Toronto,The Beaches,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,M4K,East Toronto,"Riverdale, The Danforth West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,M4L,East Toronto,"The Beaches West, India Bazaar",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,M4M,East Toronto,Studio District,0.0,0.039216,0.019608,0.0,0.0,0.039216,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0
4,M4N,Central Toronto,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,M4P,Central Toronto,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,M4R,Central Toronto,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,M4S,Central Toronto,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,M4T,Central Toronto,"Summerhill East, Moore Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,M4V,Central Toronto,"Forest Hill SE, Deer Park, Rathnelly, Summerhi...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted

(37, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Health Food Store,Trail,Pub,Neighborhood,Yoga Studio,Electronics Store,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
1,M4K,East Toronto,"Riverdale, The Danforth West",Bus Line,Park,Discount Store,Grocery Store,Fast Food Restaurant,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market
2,M4L,East Toronto,"The Beaches West, India Bazaar",Park,Sandwich Place,Burrito Place,Italian Restaurant,Fast Food Restaurant,Food & Drink Shop,Burger Joint,Liquor Store,Steakhouse,Sushi Restaurant
3,M4M,East Toronto,Studio District,Café,Bakery,Italian Restaurant,Diner,Brewery,Coffee Shop,Sushi Restaurant,Pizza Place,Bar,American Restaurant
4,M4N,Central Toronto,Lawrence Park,Bus Line,Swim School,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
5,M4P,Central Toronto,Davisville North,Grocery Store,Clothing Store,Hotel,Food & Drink Shop,Park,Breakfast Spot,Dog Run,Gym,Yoga Studio,Farmers Market
6,M4R,Central Toronto,North Toronto West,Playground,Gym Pool,Park,Garden,Electronics Store,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market
7,M4S,Central Toronto,Davisville,Dessert Shop,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Pizza Place,Thai Restaurant,Seafood Restaurant,Indian Restaurant,Sushi Restaurant
8,M4T,Central Toronto,"Summerhill East, Moore Park",Convenience Store,Gym,Yoga Studio,Ethiopian Restaurant,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant
9,M4V,Central Toronto,"Forest Hill SE, Deer Park, Rathnelly, Summerhi...",Coffee Shop,Light Rail Station,Photography Studio,Supermarket,Liquor Store,Event Space,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop


In [35]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 0, 0, 2, 3, 3, 0, 4, 0], dtype=int32)

In [70]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
#drop line 
toronto_merged = df.copy()
toronto_merged = toronto_merged.drop(22)

# add clustering labels
toronto_merged["labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!

(37, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676845,-79.295225,0,Health Food Store,Trail,Pub,Neighborhood,Yoga Studio,Electronics Store,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
1,M4K,East Toronto,"Riverdale, The Danforth West",43.683262,-79.35512,3,Bus Line,Park,Discount Store,Grocery Store,Fast Food Restaurant,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.667965,-79.314673,0,Park,Sandwich Place,Burrito Place,Italian Restaurant,Fast Food Restaurant,Food & Drink Shop,Burger Joint,Liquor Store,Steakhouse,Sushi Restaurant
3,M4M,East Toronto,Studio District,43.662766,-79.33483,0,Café,Bakery,Italian Restaurant,Diner,Brewery,Coffee Shop,Sushi Restaurant,Pizza Place,Bar,American Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72816,-79.387085,2,Bus Line,Swim School,Event Space,Food & Drink Shop,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant


In [72]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["labels"], inplace=True)
toronto_merged

(37, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676845,-79.295225,0,Health Food Store,Trail,Pub,Neighborhood,Yoga Studio,Electronics Store,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
19,M5J,Downtown Toronto,"Harbourfront East, Toronto Islands, Union Station",43.62347,-79.391507,0,Harbor / Marina,Café,Music Venue,Yoga Studio,Event Space,Food,Flower Shop,Flea Market,Fish Market,Fish & Chips Shop
20,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.6471,-79.381531,0,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Bakery,Italian Restaurant,Gastropub,Deli / Bodega,Bar
21,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648205,-79.378793,0,Coffee Shop,Hotel,Café,Restaurant,Gym,American Restaurant,Beer Bar,Italian Restaurant,Gastropub,Steakhouse
24,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67484,-79.403698,0,Sandwich Place,Pizza Place,Coffee Shop,Café,Pharmacy,Indian Restaurant,Cosmetics Shop,Pub,Restaurant,Mexican Restaurant
25,M5S,Downtown Toronto,"Harbord, University of Toronto",43.66311,-79.401801,0,Café,Restaurant,Coffee Shop,Bakery,Bar,Japanese Restaurant,Bookstore,Gym,Italian Restaurant,French Restaurant
26,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.65357,-79.397249,0,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Dumpling Restaurant,Bar,Mexican Restaurant,Vietnamese Restaurant,Bakery,Coffee Shop,Ice Cream Shop
36,M6S,West Toronto,"Runnymede, Swansea",43.649885,-79.474929,0,Café,Coffee Shop,Bakery,Pizza Place,Pet Store,Falafel Restaurant,Spa,Flower Shop,Soccer Field,Shoe Store
27,M5V,Downtown Toronto,"Bathurst Quay, South Niagara, Harbourfront Wes...",43.640815,-79.399538,0,Coffee Shop,Italian Restaurant,Café,Restaurant,Gym / Fitness Center,Bar,Park,Speakeasy,Sandwich Place,Pub
29,M5X,Downtown Toronto,"Underground city, First Canadian Place",43.64828,-79.381461,0,Coffee Shop,Café,Hotel,American Restaurant,Gastropub,Burger Joint,Deli / Bodega,Seafood Restaurant,Restaurant,Steakhouse


In [76]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters