## import and download some necessary librairies

In [35]:
import pandas as pd
import numpy as np
import requests

import sys
!{sys.executable} -m pip install BeautifulSoup4
from bs4 import BeautifulSoup

import sys
!{sys.executable} -m pip install requests

import sys
!{sys.executable} -m pip install html5lib

import sys
!{sys.executable} -m pip install lxml



## scrape table of Toronto Postcode, Borough and Neighborhood from Wiki using BeautifulSoup

In [36]:
#create requests
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

#create beautifulsoup
soup = BeautifulSoup(source, 'html5lib')

#scrape the table 
body = soup.find('body')
content = body.find('div', id = 'content')
bodyContent = content.find('div', id = 'bodyContent')
mw_content_text = bodyContent.find('div', id = 'mw-content-text')
mw_parser_output = mw_content_text.find('div', class_= 'mw-parser-output')
table_sorter = mw_parser_output.find('table', class_='wikitable sortable')

#pandas read html (result = list)
dfs = pd.read_html(str(table_sorter), header = 0)

#list to Dataframe
df = dfs[0]

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Cleaning Dataframe

In [37]:
#change column header
header = ['PostalCode', 'Borough', 'Neighborhood']
df.columns = header

#raplace "Not assigned" with NaN for easy droping 
df.replace(to_replace = 'Not assigned', value  = pd.np.nan, inplace = True)

#drop rows if Borough = NaN
df.dropna(subset = ['Borough'],inplace = True)
df.reset_index(drop = True, inplace = True)

#replace all (Neighborhood = NaN) to its Borough name
index_null = df.index[np.nonzero(pd.isnull(df.values).any(1))]
for index in index_null:
	df.iloc[index, df.columns.get_loc('Neighborhood')] = df.iloc[index, df.columns.get_loc('Borough')]

#group Postalcode and add Neighborhood with the same Postalcode together
df_group = df.groupby(['PostalCode','Borough'], sort = False)['Neighborhood'].apply(', '.join).reset_index()

df_group.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [38]:
df_group.shape

(103, 3)

## Download and Read file

In [39]:
!wget -q -O 'toronto_data.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')
df_data = pd.read_csv('toronto_data.csv')
df_data.head()

Data downloaded!


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Join Dataframe

In [60]:
header1 = ['PostalCode', 'Latitude','Longitude']
df_data.columns = header1
df_toronto = df_group.set_index('PostalCode').join(df_data.set_index('PostalCode')).reset_index()

df_toronto.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


# Part 3 Clustering

## import libraries

In [41]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import json
from pandas.io.json import json_normalize
import folium

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



## Use geopy library to get the latitude and longitude values of Toronto.

In [42]:
address = 'Toronto, Ontario, Canada'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


## Create Maps of Toronto

In [43]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, df_toronto in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighborhood']):
    label = '{}, {}'.format(df_toronto, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# For this project I'm going to segment and cluster only the neighborhoods that's __not__ in Downtown Toronto.

so let's drop the Downtown Toronto rows

In [64]:
df_no_downtown = df_toronto[df_toronto['Borough'] != "Downtown Toronto"]
df_no_downtown.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
10,M6B,North York,Glencairn,43.709577,-79.445073
11,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ...",43.650943,-79.554724


In [65]:
# create map of Manhattan using latitude and longitude values
map_no_downtown = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_no_downtown['Latitude'], df_no_downtown['Longitude'], df_no_downtown['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_no_downtown)  
    
map_no_downtown

## Get Data from Foursquare

In [174]:
CLIENT_ID = '4WO4KRUANXC4FNWIUGCJ0HHDBEOMWGNNJD3NMGRJETF53GCH' # your Foursquare ID
CLIENT_SECRET = '410VRM3ALNW3T4HADHSNLFWFJDVZ3RZ0QVQMDVVZDCH2XJ2Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4WO4KRUANXC4FNWIUGCJ0HHDBEOMWGNNJD3NMGRJETF53GCH
CLIENT_SECRET:410VRM3ALNW3T4HADHSNLFWFJDVZ3RZ0QVQMDVVZDCH2XJ2Z


## Create function for getting category type

In [143]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## Create function for getting venues

In [144]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Get Venues

In [145]:
no_downtown_venues = getNearbyVenues(names=df_no_downtown['Neighborhood'],
                                   latitudes=df_no_downtown['Latitude'],
                                   longitudes=df_no_downtown['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Heights, Lawrence Manor
Queen's Park
Islington Avenue
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Caledonia-Fairbanks
Woburn
Leaside
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The Danforth West, Riverdale
Brockton, Exhibition Place, Parkdale Village
Clairlea, Golden Mile, Oakridge
Silver Hills, York Mills
Downsview West
The Beaches West, India Bazaar
Downsview

## Analyze the data

In [146]:
print(no_downtown_venues.shape)
no_downtown_venues.head()

(978, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [175]:
no_downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",2,2,2,2,2,2
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",11,11,11,11,11,11
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Downsview North, Wilson Heights",18,18,18,18,18,18
...,...,...,...,...,...,...
Willowdale West,6,6,6,6,6,6
Woburn,3,3,3,3,3,3
"Woodbine Gardens, Parkview Hill",13,13,13,13,13,13
Woodbine Heights,9,9,9,9,9,9


In [148]:
print('There are {} uniques categories.'.format(len(no_downtown_venues['Venue Category'].unique())))

There are 206 uniques categories.


In [149]:
# one hot encoding
no_downtown_onehot = pd.get_dummies(no_downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
no_downtown_onehot['Neighborhood'] = no_downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [no_downtown_onehot.columns[-1]] + list(no_downtown_onehot.columns[:-1])
no_downtown_onehot = no_downtown_onehot[fixed_columns]

no_downtown_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,...,Trail,Transportation Service,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [150]:
no_downtown_onehot.shape

(978, 206)

In [151]:
no_downtown_grouped = no_downtown_onehot.groupby('Neighborhood').mean().reset_index()
no_downtown_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,...,Trail,Transportation Service,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
2,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
3,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
4,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,Willowdale West,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
78,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
79,"Woodbine Gardens, Parkview Hill",0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.076923,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0
80,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0


In [152]:
no_downtown_grouped.shape

(82, 206)

## Get most 10 Venues from each Neighborhood

In [176]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [177]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = no_downtown_grouped['Neighborhood']

for ind in np.arange(no_downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(no_downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Breakfast Spot,Skating Rink,Chinese Restaurant,Sandwich Place,Women's Store,Dog Run,Fast Food Restaurant,Farmers Market,Falafel Restaurant
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store,Eastern European Restaurant
2,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Liquor Store,Fried Chicken Joint,Sandwich Place,Discount Store,Fast Food Restaurant,Pizza Place,Japanese Restaurant,Beer Store,Pharmacy
3,"Alderwood, Long Branch",Pizza Place,Pub,Skating Rink,Sandwich Place,Coffee Shop,Pharmacy,Gym,Creperie,Cuban Restaurant,Event Space
4,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Fried Chicken Joint,Restaurant,Sushi Restaurant,Diner,Middle Eastern Restaurant,Sandwich Place,Bank,Frozen Yogurt Shop,Deli / Bodega


## Now clustering

In [165]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

no_downtown_grouped_clustering = no_downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(no_downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 3, 2, 2, 2,
       4, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2,
       2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 1], dtype=int32)

In [166]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

no_downtown_merged = df_no_downtown

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
no_downtown_merged = no_downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
no_downtown_merged.head(20) # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Food & Drink Shop,Park,Women's Store,Dog Run,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Pizza Place,Portuguese Restaurant,Hockey Arena,Coffee Shop,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,2.0,Clothing Store,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Vietnamese Restaurant,Accessories Store,Falafel Restaurant,Farmers Market
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,2.0,Coffee Shop,Burger Joint,Gym,Diner,Park,Chinese Restaurant,Seafood Restaurant,Portuguese Restaurant,Italian Restaurant,Yoga Studio
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,,,,,,,,,,,
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2.0,Print Shop,Fast Food Restaurant,Women's Store,Discount Store,Field,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store
7,M3B,North York,Don Mills North,43.745906,-79.352188,2.0,Gym / Fitness Center,Café,Japanese Restaurant,Caribbean Restaurant,Women's Store,Drugstore,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,2.0,Fast Food Restaurant,Pizza Place,Gym / Fitness Center,Gastropub,Pet Store,Café,Bus Line,Intersection,Bank,Pharmacy
10,M6B,North York,Glencairn,43.709577,-79.445073,2.0,Pizza Place,Pub,Metro Station,Japanese Restaurant,Discount Store,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store
11,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ...",43.650943,-79.554724,3.0,Golf Course,Women's Store,Discount Store,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store


In [167]:
no_downtown_merged.shape

(85, 16)

In [168]:
no_downtown_merged.isnull().sum()

PostalCode                0
Borough                   0
Neighborhood              0
Latitude                  0
Longitude                 0
Cluster Labels            3
1st Most Common Venue     3
2nd Most Common Venue     3
3rd Most Common Venue     3
4th Most Common Venue     3
5th Most Common Venue     3
6th Most Common Venue     3
7th Most Common Venue     3
8th Most Common Venue     3
9th Most Common Venue     3
10th Most Common Venue    3
dtype: int64

## Drop null rows

In [169]:
no_downtown_merged.dropna(inplace = True)
no_downtown_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1.0,Food & Drink Shop,Park,Women's Store,Dog Run,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Pizza Place,Portuguese Restaurant,Hockey Arena,Coffee Shop,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,2.0,Clothing Store,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Vietnamese Restaurant,Accessories Store,Falafel Restaurant,Farmers Market
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,2.0,Coffee Shop,Burger Joint,Gym,Diner,Park,Chinese Restaurant,Seafood Restaurant,Portuguese Restaurant,Italian Restaurant,Yoga Studio
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2.0,Print Shop,Fast Food Restaurant,Women's Store,Discount Store,Field,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store


In [170]:
no_downtown_merged = no_downtown_merged.astype({"Cluster Labels": int})
no_downtown_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,1,Food & Drink Shop,Park,Women's Store,Dog Run,Field,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Pizza Place,Portuguese Restaurant,Hockey Arena,Coffee Shop,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,2,Clothing Store,Furniture / Home Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Vietnamese Restaurant,Accessories Store,Falafel Restaurant,Farmers Market
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494,2,Coffee Shop,Burger Joint,Gym,Diner,Park,Chinese Restaurant,Seafood Restaurant,Portuguese Restaurant,Italian Restaurant,Yoga Studio
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,2,Print Shop,Fast Food Restaurant,Women's Store,Discount Store,Field,Farmers Market,Falafel Restaurant,Event Space,Empanada Restaurant,Electronics Store


In [171]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [172]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(no_downtown_merged['Latitude'], no_downtown_merged['Longitude'], no_downtown_merged['Neighborhood'], no_downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Thank you