### Import necessary Libraries

In [114]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import numpy as np
import geocoder
import folium
from sklearn.cluster import KMeans
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors

#### Generate data from WIKI

In [115]:
#request for data from urlk
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
#parse data with BeautifulSoup
soup = BeautifulSoup(res.content,'html.parser')
#find data from table
table = soup.find_all('table')[0]
#read data with pandas from table
d = pd.read_html(str(table))[0]
#convert to dataframe
df = pd.DataFrame(d)
#define column
df.columns = ['Postalcode','Brought','Neighbourhood']
#remove row which are 'Not assigned' in Brought column
df1 = df[df.Brought != 'Not assigned']
#Grouping Neighbourhood where Postalcode are same
df2 = df1.groupby(['Postalcode','Brought'])['Neighbourhood'].apply(', '.join).reset_index()
#Replce Neighbourhood's value with Brought's value where are Neighbourhood is 'Not assigned'
df2['Neighbourhood'] = np.where((df2['Neighbourhood'] == 'Not assigned'),df2['Brought'],df2['Neighbourhood'])
#showing sample output
df2.head()

Unnamed: 0,Postalcode,Brought,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Generate lat long from local file

In [118]:
#read Latitude,Latitude data locally
ll_data = pd.read_csv("E:\Coursera\Coursera_Capstone\Geospatial_Coordinates.csv")
#convert to dataframe
df3 = pd.DataFrame(ll_data)
#define column
df3.columns = ['Postalcode','Latitude','Longitude']
#showing sample output
df3.head()

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [119]:
#merge tow dataframe with Postalcode
df4 = pd.merge(df2, df3,on='Postalcode', how='outer')
#showing sample output
df4.head()

Unnamed: 0,Postalcode,Brought,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Get latitude and longitude of Toronto

In [120]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Ontario are 43.653963, -79.387207.


#### Map create with Toronto latitude and longitude values

In [121]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude,longitude], zoom_start=12)
# add markers to map
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Brought'], df4['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Will analyze torento neighbour check the list

In [122]:
df4.Brought.unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

#### Will analyze with Scarborough neighourhood

In [124]:
scarborough_data = df4[df4['Brought'] == 'Scarborough'].reset_index(drop=True)
scarborough_data.head()

Unnamed: 0,Postalcode,Brought,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Get latitude and longitude of Scarborough

In [125]:
scarborough_address = 'Scarborough, Toronto'

scarborough_geolocator = Nominatim(user_agent="st_explorer")
scarborough_location = scarborough_geolocator.geocode(address)
scarborough_latitude = scarborough_location.latitude
scarborough_longitude = scarborough_location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(scarborough_latitude, scarborough_longitude))

The geograpical coordinate of Scarborough are 43.653963, -79.387207.


#### Create map of Scarborough Latitude and Longitude values

In [126]:
# create map of Scarborough using latitude and longitude values
map_scarborough = folium.Map(location=[scarborough_latitude, scarborough_longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scarborough_data['Latitude'], scarborough_data['Longitude'],scarborough_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_scarborough

#### Define client id and client scret with Foursquare

In [127]:
CLIENT_ID = '1QEORTYPVXPDCE2DFVI0ZOYGIZRLOVBILKIWE1QSOG3TVQI1' 
CLIENT_SECRET = 'YTME2D3YOAHYG4DCDKF5ZAEORJF0VZYGDHUQL1PHIHRSHK3X' 
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1QEORTYPVXPDCE2DFVI0ZOYGIZRLOVBILKIWE1QSOG3TVQI1
CLIENT_SECRET:YTME2D3YOAHYG4DCDKF5ZAEORJF0VZYGDHUQL1PHIHRSHK3X


#### working with one Neighbourhood

In [48]:
scarborough_data.loc[0, 'Neighbourhood']

'Rouge, Malvern'

In [128]:
#Get the neighborhood's latitude and longitude values.
neighborhood_latitude = scarborough_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = scarborough_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = scarborough_data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Rouge, Malvern are 43.8066863, -79.1943534.


In [129]:
# #### Now, let's get the top 3 venues that are in Rouge, Malvern within a radius of 500 meters.
LIMIT = 3
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=1QEORTYPVXPDCE2DFVI0ZOYGIZRLOVBILKIWE1QSOG3TVQI1&client_secret=YTME2D3YOAHYG4DCDKF5ZAEORJF0VZYGDHUQL1PHIHRSHK3X&v=20180605&ll=43.8066863,-79.1943534&radius=500&limit=3'

In [130]:
results = requests.get(url).json()
results

{u'meta': {u'code': 200, u'requestId': u'5da9ce51a30619002c9ac9a4'},
 u'response': {u'groups': [{u'items': [{u'reasons': {u'count': 0,
       u'items': [{u'reasonName': u'globalInteractionReason',
         u'summary': u'This spot is popular',
         u'type': u'general'}]},
      u'referralId': u'e-0-4bb6b9446edc76b0d771311c-0',
      u'venue': {u'categories': [{u'icon': {u'prefix': u'https://ss3.4sqi.net/img/categories_v2/food/fastfood_',
          u'suffix': u'.png'},
         u'id': u'4bf58dd8d48988d16e941735',
         u'name': u'Fast Food Restaurant',
         u'pluralName': u'Fast Food Restaurants',
         u'primary': True,
         u'shortName': u'Fast Food'}],
       u'id': u'4bb6b9446edc76b0d771311c',
       u'location': {u'cc': u'CA',
        u'city': u'Toronto',
        u'country': u'Canada',
        u'crossStreet': u'Morningside & Sheppard',
        u'distance': 387,
        u'formattedAddress': [u'Toronto ON', u'Canada'],
        u'labeledLatLngs': [{u'label': u'display

In [131]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [132]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy's,Fast Food Restaurant,43.807448,-79.199056
1,Interprovincial Group,Print Shop,43.80563,-79.200378


#### Let's create a function to repeat the same process to all the neighborhoods in scarborough

In [133]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now define all the neighborhood

In [134]:
scarborough_venues = getNearbyVenues(names=scarborough_data['Neighbourhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge


#### Let's check the size of the resulting dataframe

In [135]:
print(scarborough_venues.shape)
scarborough_venues.head()

(41, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge, Malvern",43.806686,-79.194353,Interprovincial Group,43.80563,-79.200378,Print Shop
2,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [136]:
scarborough_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,3,3,3,3,3,3
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2
"Birch Cliff, Cliffside West",3,3,3,3,3,3
Cedarbrae,3,3,3,3,3,3
"Clairlea, Golden Mile, Oakridge",3,3,3,3,3,3
"Clarks Corners, Sullivan, Tam O'Shanter",3,3,3,3,3,3
"Cliffcrest, Cliffside, Scarborough Village West",2,2,2,2,2,2
"Dorset Park, Scarborough Town Centre, Wexford Heights",3,3,3,3,3,3
"East Birchmount Park, Ionview, Kennedy Park",3,3,3,3,3,3
"Guildwood, Morningside, West Hill",3,3,3,3,3,3


#### Let's find out how many unique categories can be curated from all the returned venues

In [137]:
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 30 uniques categories.


## Analyze Each Neighborhood

In [138]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,...,Mexican Restaurant,Middle Eastern Restaurant,Motel,Park,Pizza Place,Playground,Print Shop,Sandwich Place,Skating Rink,Thai Restaurant
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,"Highland Creek, Rouge Hill, Port Union",0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [139]:
scarborough_onehot.shape

(41, 31)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [140]:
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,...,Mexican Restaurant,Middle Eastern Restaurant,Motel,Park,Pizza Place,Playground,Print Shop,Sandwich Place,Skating Rink,Thai Restaurant
0,Agincourt,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0
3,Cedarbrae,0.0,0.333333,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Final Shape

In [141]:
scarborough_grouped.shape

(16, 31)

In [142]:
num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                 venue  freq
0       Breakfast Spot  0.33
1       Sandwich Place  0.33
2               Lounge  0.33
3  American Restaurant  0.00
4   Italian Restaurant  0.00


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0           Playground   0.5
1                 Park   0.5
2  American Restaurant   0.0
3   Athletics & Sports   0.0
4         Skating Rink   0.0


----Birch Cliff, Cliffside West----
                   venue  freq
0           Skating Rink  0.33
1                   Café  0.33
2  General Entertainment  0.33
3    American Restaurant  0.00
4     Italian Restaurant  0.00


----Cedarbrae----
                  venue  freq
0  Caribbean Restaurant  0.33
1      Hakka Restaurant  0.33
2    Athletics & Sports  0.33
3   American Restaurant  0.00
4     Korean Restaurant  0.00


----Clairlea, Golden Mile, Oakridge----
                  venue  freq
0              Bus Line  0.33
1  Fast Food Restaurant  0.33
2         M

#### Let's put that into a *pandas* dataframe

In [143]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [144]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Sandwich Place,Breakfast Spot,Lounge,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Bus Line,Café,Caribbean Restaurant
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café,Caribbean Restaurant
2,"Birch Cliff, Cliffside West",Skating Rink,Café,General Entertainment,Thai Restaurant,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Caribbean Restaurant,Chinese Restaurant
3,Cedarbrae,Hakka Restaurant,Athletics & Sports,Caribbean Restaurant,Skating Rink,Bar,Breakfast Spot,Bus Line,Café,Chinese Restaurant,Coffee Shop
4,"Clairlea, Golden Mile, Oakridge",Bus Line,Metro Station,Fast Food Restaurant,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Breakfast Spot,Café,Caribbean Restaurant


## Cluster Neighborhoods

In [145]:
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 2, 1, 1, 1, 0, 1, 0, 4, 1])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [147]:
scarborough_merged = scarborough_data
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
# merge scarborough_grouped with scarborough_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

#replace NAN to 0 
scarborough_merged['Cluster Labels'] = scarborough_merged['Cluster Labels'].replace(np.nan, 0)
#scarborough_merged.head() # check the last columns!
scarborough_merged.head()

Unnamed: 0,Postalcode,Brought,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,1.0,Print Shop,Fast Food Restaurant,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café,Caribbean Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,3.0,Bar,Thai Restaurant,Skating Rink,Athletics & Sports,Breakfast Spot,Bus Line,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1.0,Pizza Place,Mexican Restaurant,Electronics Store,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café
3,M1G,Scarborough,Woburn,43.770992,-79.216917,4.0,Coffee Shop,Korean Restaurant,Thai Restaurant,General Entertainment,Athletics & Sports,Bar,Breakfast Spot,Bus Line,Café,Caribbean Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1.0,Hakka Restaurant,Athletics & Sports,Caribbean Restaurant,Skating Rink,Bar,Breakfast Spot,Bus Line,Café,Chinese Restaurant,Coffee Shop


In [148]:
# create map
map_clusters = folium.Map(location=[scarborough_latitude, scarborough_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighbourhood'],scarborough_merged['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters