# Coursera Capstone Notebook
## Week 1

In [1]:
import pandas as pd
import numpy as np

In [2]:
print('Hello Capstone Project Course!')

Hello Capstone Project Course!


## Week 2

### Scraping Data

In [1]:
# import necessary libraries for scraping
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [60]:
# create dataframe from wikipedia table with BeatifulSoup
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
req = requests.get(url)

soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]

dfs = pd.read_html(str(table))
dfs
df = dfs[0]
df[0:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


In [61]:
# assign borough name to 'not assigned' neighbourhoods
na = df.index[df['Neighbourhood'] == 'Not assigned'].tolist()

df.loc[na, 'Neighbourhood'] = df.loc[na, 'Borough']
df[0:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Queen's Park


In [62]:
# drop rows containing 'not assigned' boroughs
na_b = df[df['Borough'] == 'Not assigned'].index

df.drop(na_b , inplace=True)
df[0:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [63]:
# group neighbourhoods within the same postal code comma separated
df = df.groupby(['Postcode','Borough'], sort=False, as_index=False).agg( ', '.join)
df[0:10]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [64]:
df.shape

(103, 3)

### Neighbourhood Summarization & Location Extraction

In [65]:
# read predefined .csv-file containing latitude and longitude data
df_l = pd.read_csv(r"https://cocl.us/Geospatial_data")
df_l.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
print(df_l.shape)
df_l.head()

(103, 3)


Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [74]:
# map latitude and longitude data to previously created dataframe
df['Latitude'] = df['Postcode'].map(df_l.set_index('Postcode')['Latitude'])
df['Longitude'] = df['Postcode'].map(df_l.set_index('Postcode')['Longitude'])
df[0:10]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


### Neighbourhood Exploration & Clustering

In [90]:
# install folium
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

In [91]:
# import necessary libraries
from geopy.geocoders import Nominatim
import folium

In [93]:
# get geolocation of Toronto by address
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent='my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


**Note:** To recreate the NY-Lab I will again import a dataframe containing the boroughs and neighborhoods of Toronto but without grouping them together by postal code

In [101]:
# import necessary libraries for scraping
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [106]:
# create dataframe from wikipedia table with BeatifulSoup
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
req = requests.get(url)

soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]

dfs = pd.read_html(str(table))
df = dfs[0]

# assign borough name to 'not assigned' neighbourhoods
na = df.index[df['Neighbourhood'] == 'Not assigned'].tolist()

df.loc[na, 'Neighbourhood'] = df.loc[na, 'Borough']

# drop rows containing 'not assigned' boroughs
na_b = df[df['Borough'] == 'Not assigned'].index

df.drop(na_b , inplace=True)

# map latitude and longitude from predefined dataframe
df['Latitude'] = df['Postcode'].map(df_l.set_index('Postcode')['Latitude'])
df['Longitude'] = df['Postcode'].map(df_l.set_index('Postcode')['Longitude'])

# get filtered dataframe containing only boroughs within Toronto
neighborhoods = df[df['Borough'].str.contains('Toronto')]

neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
4,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
7,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
16,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937
17,M5B,Downtown Toronto,Garden District,43.657162,-79.378937
33,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418


In [112]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add borough markers to map
for lat, lng, borough, neighbourhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#008000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

In [99]:
# define Foursquare credentials and version
CLIENT_ID = 'JQU0HLCVSP2XABMT521CHSJSUX0EWGHWV3TTMVR2YS1MB4WQ'
CLIENT_SECRET = 'JDZEO4RGBZJBIX3IPYXRSNQ1BOH2CIN1OE5SUWEVXTSV10QC'
VERSION = '20180605' # Foursquare API version

In [100]:
# Copy of the getNearbyVenues-function from the NY-Lab
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [125]:
# get max. 100 venues in da 500m radius around the borough center
limit = 100
radius = 500

toronto_venues = getNearbyVenues(names=neighborhoods['Neighbourhood'],
                                 latitudes=neighborhoods['Latitude'],
                                 longitudes=neighborhoods['Longitude'],
                                 radius=radius)

Harbourfront
Queen's Park
Ryerson
Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide
King
Richmond
Dovercourt Village
Dufferin
Harbourfront East
Toronto Islands
Union Station
Little Portugal
Trinity
The Danforth West
Riverdale
Design Exchange
Toronto Dominion Centre
Brockton
Exhibition Place
Parkdale Village
The Beaches West
India Bazaar
Commerce Court
Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North
Forest Hill West
High Park
The Junction South
North Toronto West
The Annex
North Midtown
Yorkville
Parkdale
Roncesvalles
Davisville
Harbord
University of Toronto
Runnymede
Swansea
Moore Park
Summerhill East
Chinatown
Grange Park
Kensington Market
Deer Park
Forest Hill SE
Rathnelly
South Hill
Summerhill West
CN Tower
Bathurst Quay
Island airport
Harbourfront West
King and Spadina
Railway Lands
South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown
St. James Town
First Canadian Place
Underground city

In [126]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65426,-79.360636,Cooper Koo Family YMCA,43.653191,-79.357947,Gym / Fitness Center
3,Harbourfront,43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,Harbourfront,43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [127]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Quay,12,12,12,12,12,12
Berczy Park,56,56,56,56,56,56
Brockton,22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,16,16,16,16,16,16
CN Tower,12,12,12,12,12,12
Cabbagetown,44,44,44,44,44,44
Central Bay Street,83,83,83,83,83,83
Chinatown,91,91,91,91,91,91
Christie,17,17,17,17,17,17


In [132]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to frame
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']

# group by neighborhood name and get mean frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Adelaide,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.020000,0.000000,0.00,...,0.000000,0.00000,0.00,0.020000,0.000000,0.000000,0.000000,0.000000,0.010000,0.000000
1,Bathurst Quay,0.000000,0.083333,0.083333,0.166667,0.166667,0.166667,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Berczy Park,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.017857,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,Brockton,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.045455
4,Business Reply Mail Processing Centre 969 Eastern,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,CN Tower,0.000000,0.083333,0.083333,0.166667,0.166667,0.166667,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,Cabbagetown,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,Central Bay Street,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.012048,0.000000,0.00,...,0.000000,0.00000,0.00,0.012048,0.000000,0.000000,0.012048,0.000000,0.000000,0.012048
8,Chinatown,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.054945,0.000000,0.054945,0.010989,0.000000,0.000000,0.000000
9,Christie,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,...,0.000000,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [135]:
# get top 5 venues per neighborhood
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("---- "+hood+" ----")
    
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['Venue', 'Freq']
    temp = temp.iloc[1:]
    temp['Freq'] = temp['Freq'].astype(float)
    temp = temp.round({'Freq' : 2})
    
    print(temp.sort_values('Freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Adelaide ----
              Venue  Freq
0       Coffee Shop  0.07
1              Café  0.04
2               Bar  0.04
3        Steakhouse  0.04
4  Asian Restaurant  0.03


---- Bathurst Quay ----
              Venue  Freq
0    Airport Lounge  0.17
1   Airport Service  0.17
2  Airport Terminal  0.17
3   Harbor / Marina  0.08
4           Airport  0.08


---- Berczy Park ----
                Venue  Freq
0         Coffee Shop  0.07
1        Cocktail Bar  0.05
2          Steakhouse  0.04
3  Seafood Restaurant  0.04
4              Bakery  0.04


---- Brockton ----
                    Venue  Freq
0                    Café  0.14
1          Breakfast Spot  0.09
2             Coffee Shop  0.09
3           Grocery Store  0.05
4  Furniture / Home Store  0.05


---- Business Reply Mail Processing Centre 969 Eastern ----
                  Venue  Freq
0                   Spa  0.06
1         Auto Workshop  0.06
2            Smoke Shop  0.06
3                  Park  0.06
4  Fast Food Restaurant  0

The analysis proposes for example that there is an **airport** in the **Bathurst Quay neighborhood**.
Also there might be a small **Japanese community** in the **Church and Wellesley neighborhood** due to the highest frequency of Japanese and Sushi restaurants

In [136]:
# copy of the return_most_common_venues function from NY-Lab

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [138]:
# top 10 venues per neighborhood

import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Bar,Steakhouse,Asian Restaurant,Burger Joint,Hotel,Sushi Restaurant,Clothing Store,Thai Restaurant
1,Bathurst Quay,Airport Lounge,Airport Service,Airport Terminal,Airport,Airport Food Court,Boutique,Harbor / Marina,Boat or Ferry,Sculpture Garden,Yoga Studio
2,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Steakhouse,Bakery,Café,Cheese Shop,Beer Bar,Liquor Store
3,Brockton,Café,Coffee Shop,Breakfast Spot,Furniture / Home Store,Pet Store,Performing Arts Venue,Italian Restaurant,Intersection,Gym,Yoga Studio
4,Business Reply Mail Processing Centre 969 Eastern,Skate Park,Brewery,Recording Studio,Fast Food Restaurant,Farmers Market,Burrito Place,Auto Workshop,Pizza Place,Spa,Garden


In [165]:
from sklearn.cluster import KMeans

# set number of clusters
k = 4

# extract onehot encoded features
toronto_grouped_clustered = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(toronto_grouped_clustered)

# merge toronto_grouped with neighborhoods to add latitude/longitude for each neighborhood
toronto_merged = neighborhoods
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636,0,Coffee Shop,Park,Pub,Bakery,Mexican Restaurant,Restaurant,Café,Breakfast Spot,Performing Arts Venue,Dessert Shop
7,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494,0,Coffee Shop,Gym,Park,Yoga Studio,Nightclub,Sandwich Place,Burrito Place,Café,Chinese Restaurant,Portuguese Restaurant
16,M5B,Downtown Toronto,Ryerson,43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Japanese Restaurant,Tea Room,Diner,Bookstore,Fast Food Restaurant
17,M5B,Downtown Toronto,Garden District,43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Middle Eastern Restaurant,Japanese Restaurant,Tea Room,Diner,Bookstore,Fast Food Restaurant
33,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Café,Restaurant,Bakery,Park,Diner,Breakfast Spot,Italian Restaurant,Beer Bar,Cosmetics Shop
46,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Health Food Store,Trail,Pub,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
47,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Steakhouse,Bakery,Café,Cheese Shop,Beer Bar,Liquor Store
56,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Ice Cream Shop,Japanese Restaurant,Burger Joint,Juice Bar,Department Store,Gym / Fitness Center
57,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Athletics & Sports,Italian Restaurant,Diner,Candy Store,Restaurant,Baby Store,Coffee Shop
67,M5H,Downtown Toronto,Adelaide,43.650571,-79.384568,0,Coffee Shop,Café,Bar,Steakhouse,Asian Restaurant,Burger Joint,Hotel,Sushi Restaurant,Clothing Store,Thai Restaurant


In [166]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map of Toronto inserting clusters
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add borough markers to map
markers_colors = []
for lat, lng, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7,
        parse_html=False).add_to(map_clusters)
    
map_clusters