## Import all necessary Python libraries

In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import folium
import requests
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors


## Load CSV file and create Pandas data frame

In [2]:
#csv_file = ('https://cocl.us/Geospatial_data')
#df_csv = pd.read_csv(csv_file)

## Load HTML code into BeautifulSoup object for scraping

In [3]:
# HTML code from wiki-page with Canadian Postal Codes
html=urlopen('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

# Create Beautiful Soup Object with HTML code from Wiki-Page
bsObj=BeautifulSoup(html.read())

# Scrape the Postal Codes, Boroughs, and Neighborhoods out of Beautiful Soup Object.
data=(bsObj.tbody.findAll('td'))

In [4]:
# Initialize the three following lists for population with data from 'data' BS Object

Postal_Code=[]
Borough=[]
Neighborhood=[]

column_count=1


# The following loop will "parse" the Beautiful Soup Object into their respective columns for later 
# data frame creation.

for i in range (len(data)):
    if column_count==1:
        Postal_Code.append(data[i].get_text(strip=True))        
    elif column_count==2:
        Borough.append(data[i].get_text(strip=True))
    elif column_count==3:
        Neighborhood.append(data[i].get_text(strip=True))

    column_count+=1
    if column_count>3:
        column_count=1

## Create and clean data frame with columns scraped from Wiki-Page

In [5]:
df=pd.DataFrame(Postal_Code, columns=['Postal_Code'])
df['Borough']=Borough
df['Neighborhood Name']=Neighborhood

In [6]:
df.head()

Unnamed: 0,Postal_Code,Borough,Neighborhood Name
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [7]:
# replace "Not assigned" with NaN for later removal
df.replace("Not assigned", np.nan, inplace = True)
df.head(5)

Unnamed: 0,Postal_Code,Borough,Neighborhood Name
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [8]:
# Checking boolean values for null / no-null values

no_assign = df.isnull()
no_assign.head(5)

Unnamed: 0,Postal_Code,Borough,Neighborhood Name
0,False,True,True
1,False,True,True
2,False,False,False
3,False,False,False
4,False,False,False


In [9]:
# Checking null vs. no-null column value counts 

for column in no_assign.columns.values.tolist():
    print(column)
    print (no_assign[column].value_counts())
    print("")

Postal_Code
False    180
Name: Postal_Code, dtype: int64

Borough
False    103
True      77
Name: Borough, dtype: int64

Neighborhood Name
False    103
True      77
Name: Neighborhood Name, dtype: int64



In [10]:
# Drop the NAs in the data frame

df.dropna(axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)


In [11]:
# Verify data frame has no NaN

no_assign_no_na = df.isnull()
no_assign_no_na.head(5)

for column in no_assign_no_na.columns.values.tolist():
    print(column)
    print (no_assign_no_na[column].value_counts())
    print("")

Postal_Code
False    103
Name: Postal_Code, dtype: int64

Borough
False    103
Name: Borough, dtype: int64

Neighborhood Name
False    103
Name: Neighborhood Name, dtype: int64



In [12]:
# Check newly formatted data frame after dropping NaNs

df.head()

Unnamed: 0,Postal_Code,Borough,Neighborhood Name
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## Load CSV file containing lat. and long. of postal code zones

In [13]:
# Load CSV file and create Pandas data frame

csv_file = ('https://cocl.us/Geospatial_data')
df_csv = pd.read_csv(csv_file)

In [14]:
df_csv.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Prepare both dataframes for later concatenating

In [15]:
# Sort values in geospatial data frame by postal code identifiers
df_csv.sort_values(by=['Postal Code'], inplace=True)

# Sort values in neighborhood / boroughs dataframe (scraped from wikipage) by postal code identifiers
df.sort_values(by=['Postal_Code'], inplace=True)

# reset BOTH indexes of each dataframe
df_csv.reset_index(drop=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [16]:
# Sanity check the geospatial dataframe against the neighborhood / boroughs dataframe in next cell below. both
# data frames should have the same number of rows, and have their respective postal code identifiers in ascending
# alphabetical order
df_csv

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [17]:
# both dataframes have 103 rows, and have postal code identifiers matching their respective row numbers. 
# NOTE: The column for the postal code columns in each dataframe are purposefully named uniquely. This is only
# for later comparison when the two dataframes are concatenated. 
df

Unnamed: 0,Postal_Code,Borough,Neighborhood Name
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [18]:
# Concatenate both dataframes into one single dataframe.
con_df=pd.concat([df, df_csv], axis=1)

In [19]:
# Sanity check the resulting dataframe. Again, note both postal code identifier columns. One will be dropped later.
con_df

Unnamed: 0,Postal_Code,Borough,Neighborhood Name,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...,...,...
98,M9N,York,Weston,M9N,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,M9P,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",M9R,43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",M9V,43.739416,-79.588437


In [20]:
# Drop one of the postal code columns
con_df.drop('Postal Code',axis=1, inplace=True)

In [21]:
# Resulting dataframe after the combining of both the geospatial csv data file, and the web scraped wiki-page with
# the neighborhood, boroughs, and postal code identifiers for each.
con_df.head()

Unnamed: 0,Postal_Code,Borough,Neighborhood Name,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [22]:
# Create a separate dataframe from the concatenated dataframe above to isolate boroughs located in Toronto
Toronto_df=con_df[(con_df['Borough'].str.contains('Toronto'))]
Toronto_df.reset_index(drop=True, inplace=True)
Toronto_df.head()

Unnamed: 0,Postal_Code,Borough,Neighborhood Name,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [23]:
# Obtain geolocation parameters for folium 
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [24]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Neighborhood Name']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

## Assign credentials for foursquare.com API

In [25]:
CLIENT_ID = 'WR33UMRN2QKRJ5NQGSRSEY0VX3ZBOJ11WSKG04031GHWRI50' # your Foursquare ID
CLIENT_SECRET = 'WQJLYR2F14QBSDFCCKSH3IJDS1HNNB3OMJF12CA4IWCLVUMK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: WR33UMRN2QKRJ5NQGSRSEY0VX3ZBOJ11WSKG04031GHWRI50
CLIENT_SECRET:WQJLYR2F14QBSDFCCKSH3IJDS1HNNB3OMJF12CA4IWCLVUMK


In [26]:
# 

neighborhood_latitude = Toronto_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Toronto_df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Toronto_df.loc[0, 'Neighborhood Name'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


In [27]:
# Create url with necessary parameters and user credentials for foursquare API

LIMIT = 500 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
print (url) # display URL

results = requests.get(url).json()
print ('\n',results)

https://api.foursquare.com/v2/venues/explore?&client_id=WR33UMRN2QKRJ5NQGSRSEY0VX3ZBOJ11WSKG04031GHWRI50&client_secret=WQJLYR2F14QBSDFCCKSH3IJDS1HNNB3OMJF12CA4IWCLVUMK&v=20180605&ll=43.67635739999999,-79.2930312&radius=500&limit=500

 {'meta': {'code': 200, 'requestId': '5eefc514211536001b8c6911'}, 'response': {'headerLocation': 'The Beaches', 'headerFullLocation': 'The Beaches, Toronto', 'headerLocationGranularity': 'neighborhood', 'totalResults': 4, 'suggestedBounds': {'ne': {'lat': 43.680857404499996, 'lng': -79.28682091449052}, 'sw': {'lat': 43.67185739549999, 'lng': -79.29924148550948}}, 'groups': [{'type': 'Recommended Places', 'name': 'recommended', 'items': [{'reasons': {'count': 0, 'items': [{'summary': 'This spot is popular', 'type': 'general', 'reasonName': 'globalInteractionReason'}]}, 'venue': {'id': '4bd461bc77b29c74a07d9282', 'name': 'Glen Manor Ravine', 'location': {'address': 'Glen Manor', 'crossStreet': 'Queen St.', 'lat': 43.67682094413784, 'lng': -79.29394208780985,

In [28]:
print(results['response']['groups'][0]['items'][0]['venue']['name'])
print(results['response']['groups'][0]['items'][0]['venue']['id'])
print('Lat: ', (results['response']['groups'][0]['items'][0]['venue']['location']['lat']))
print('Long: ', (results['response']['groups'][0]['items'][0]['venue']['location']['lng']))
print(results['response']['groups'][0]['items'][0]['venue']['categories'])
print(results['response']['groups'][0]['items'][0]['venue']['categories'][0]['name'])



Glen Manor Ravine
4bd461bc77b29c74a07d9282
Lat:  43.67682094413784
Long:  -79.29394208780985
[{'id': '4bf58dd8d48988d159941735', 'name': 'Trail', 'pluralName': 'Trails', 'shortName': 'Trail', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_', 'suffix': '.png'}, 'primary': True}]
Trail


## Loop / nested loop to call foursquare API and obtain venues from neighborhoods

In [29]:
LIMIT = 500 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# Intialize empty list to populate venues
venues_list=[]


# Outside loop calls API with lat and long of sequential neighborhoods
for row in range (Toronto_df.shape[0]):
    neighborhood_latitude = Toronto_df.loc[row, 'Latitude'] # neighborhood latitude value
    neighborhood_longitude = Toronto_df.loc[row, 'Longitude'] # neighborhood longitude value

    neighborhood_name = Toronto_df.loc[row, 'Neighborhood Name'] # neighborhood name


    
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        neighborhood_latitude, 
        neighborhood_longitude, 
        radius, 
        LIMIT)
            
    # make the GET request
    results = requests.get(url).json()
    

    # Inside (nested) loop to parse each neighborhood's venue name, location, and category
    try:
        for venue in range(len(results['response']['groups'][0]['items'])):
            try:
                name = results['response']['groups'][0]['items'][venue]['venue']['name']
                lat = results['response']['groups'][0]['items'][venue]['venue']['location']['lat']
                long = results['response']['groups'][0]['items'][venue]['venue']['location']['lng']
                cat = results['response']['groups'][0]['items'][venue]['venue']['categories'][0]['name']
                
                # Append parsed data to venues list
                venues_list.append([
                neighborhood_name,
                neighborhood_latitude,
                neighborhood_longitude,
                name, 
                lat, 
                long,              
                cat])
                
                print("Name: ", name, "\tLat: ", lat, "\tLong: ", long, "\tCategory: ", cat)
            
            except: 
                pass

            
    except:
        pass
    
    

    print ("\n")



Name:  Glen Manor Ravine 	Lat:  43.67682094413784 	Long:  -79.29394208780985 	Category:  Trail
Name:  The Big Carrot Natural Food Market 	Lat:  43.678879 	Long:  -79.297734 	Category:  Health Food Store
Name:  Grover Pub and Grub 	Lat:  43.679181434941015 	Long:  -79.29721535878515 	Category:  Pub
Name:  Upper Beaches 	Lat:  43.68056321147582 	Long:  -79.2928688743688 	Category:  Neighborhood


Name:  MenEssentials 	Lat:  43.677820068604575 	Long:  -79.35126543045044 	Category:  Cosmetics Shop
Name:  Pantheon 	Lat:  43.67762124481265 	Long:  -79.35143390043564 	Category:  Greek Restaurant
Name:  La Diperie 	Lat:  43.677702 	Long:  -79.352265 	Category:  Ice Cream Shop
Name:  Dolce Gelato 	Lat:  43.677772998450614 	Long:  -79.35118737317053 	Category:  Ice Cream Shop
Name:  Cafe Fiorentina 	Lat:  43.677743 	Long:  -79.350115 	Category:  Italian Restaurant
Name:  Mezes 	Lat:  43.6779622615441 	Long:  -79.35019606997218 	Category:  Greek Restaurant
Name:  Louis Cifer Brew Works 	Lat:  43.

Name:  Lawrence Park Ravine 	Lat:  43.72696303913755 	Long:  -79.39438246708775 	Category:  Park
Name:  Zodiac Swim School 	Lat:  43.72853205765438 	Long:  -79.3828602612317 	Category:  Swim School
Name:  TTC Bus #162 - Lawrence-Donway 	Lat:  43.72802605799448 	Long:  -79.38280527753858 	Category:  Bus Line


Name:  Sherwood Park 	Lat:  43.71655100307589 	Long:  -79.38777567141624 	Category:  Park
Name:  Summerhill Market North 	Lat:  43.71549914910689 	Long:  -79.39288125988016 	Category:  Food & Drink Shop
Name:  Homeway Restaurant & Brunch 	Lat:  43.71264120397444 	Long:  -79.39155655199944 	Category:  Breakfast Spot
Name:  Winners 	Lat:  43.71323606413252 	Long:  -79.3938728363979 	Category:  Department Store
Name:  Best Western Roehampton Hotel & Suites 	Lat:  43.7088783 	Long:  -79.3908798 	Category:  Hotel
Name:  Subway 	Lat:  43.7084743635371 	Long:  -79.39067425664052 	Category:  Sandwich Place
Name:  Gym 	Lat:  43.71312601210131 	Long:  -79.39353747528043 	Category:  Gym
Name

Name:  Rosedale Park 	Lat:  43.68232820227814 	Long:  -79.37893434347683 	Category:  Playground
Name:  Whitney Park 	Lat:  43.68203573063681 	Long:  -79.37378835021306 	Category:  Park
Name:  Alex Murray Parkette 	Lat:  43.678300240478954 	Long:  -79.38277328698108 	Category:  Park
Name:  Milkman's Lane 	Lat:  43.676352068015554 	Long:  -79.37384239440172 	Category:  Trail


Name:  Cranberries 	Lat:  43.6678427705951 	Long:  -79.36940687874281 	Category:  Diner
Name:  Kingyo Toronto 	Lat:  43.66589495808371 	Long:  -79.36841520638596 	Category:  Japanese Restaurant
Name:  F'Amelia 	Lat:  43.66753590663226 	Long:  -79.36861331485827 	Category:  Italian Restaurant
Name:  Murgatroid 	Lat:  43.66738149016019 	Long:  -79.36931129229457 	Category:  Restaurant
Name:  Merryberry Cafe + Bistro 	Lat:  43.66663020301851 	Long:  -79.36879184199822 	Category:  Café
Name:  Cabbagetown Brew 	Lat:  43.66692279890784 	Long:  -79.36928929560437 	Category:  Café
Name:  Butter Chicken Factory 	Lat:  43.66

Name:  Roselle Desserts 	Lat:  43.653446723052674 	Long:  -79.3620167174383 	Category:  Bakery
Name:  Tandem Coffee 	Lat:  43.65355870959944 	Long:  -79.36180945913513 	Category:  Coffee Shop
Name:  Cooper Koo Family YMCA 	Lat:  43.65324910177244 	Long:  -79.35800826343677 	Category:  Distribution Center
Name:  Body Blitz Spa East 	Lat:  43.65473505045365 	Long:  -79.35987433132891 	Category:  Spa
Name:  Dominion Pub and Kitchen 	Lat:  43.65691857501867 	Long:  -79.35896684476664 	Category:  Pub
Name:  Corktown Common 	Lat:  43.655617799749734 	Long:  -79.3562113397429 	Category:  Park
Name:  Impact Kitchen 	Lat:  43.65636850543279 	Long:  -79.35697968750694 	Category:  Restaurant
Name:  Morning Glory Cafe 	Lat:  43.653946942635294 	Long:  -79.36114884214422 	Category:  Breakfast Spot
Name:  The Extension Room 	Lat:  43.65331304337331 	Long:  -79.35972538072777 	Category:  Gym / Fitness Center
Name:  The Distillery Historic District 	Lat:  43.65024435658077 	Long:  -79.35932278633118 	

Name:  Gyu-Kaku Japanese BBQ 	Lat:  43.651422275497914 	Long:  -79.37504693687086 	Category:  Japanese Restaurant
Name:  GEORGE Restaurant 	Lat:  43.65334645635036 	Long:  -79.3744449051126 	Category:  Restaurant
Name:  Fahrenheit Coffee 	Lat:  43.65238358726612 	Long:  -79.37271903848271 	Category:  Coffee Shop
Name:  Terroni 	Lat:  43.650927 	Long:  -79.375602 	Category:  Italian Restaurant
Name:  Crepe TO 	Lat:  43.650063279511016 	Long:  -79.37458664523871 	Category:  Creperie
Name:  Hogtown Smoke 	Lat:  43.64928693557788 	Long:  -79.37468932637938 	Category:  Food Truck
Name:  Mystic Muffin 	Lat:  43.652483550499326 	Long:  -79.37265472845176 	Category:  Middle Eastern Restaurant
Name:  Aveda Institute Toronto 	Lat:  43.65009640955562 	Long:  -79.37362952211767 	Category:  Cosmetics Shop
Name:  Versus Coffee 	Lat:  43.65121343781903 	Long:  -79.37523564828548 	Category:  Coffee Shop
Name:  GoodLife Fitness Toronto 137 Yonge Street 	Lat:  43.651242 	Long:  -79.378068 	Category:  Gy

Name:  Jimmy's Coffee 	Lat:  43.65842123574496 	Long:  -79.38561319551111 	Category:  Coffee Shop
Name:  Tim Hortons 	Lat:  43.658569999999976 	Long:  -79.38512341104502 	Category:  Coffee Shop
Name:  Somethin' 2 Talk About 	Lat:  43.65839479027968 	Long:  -79.38533765920816 	Category:  Middle Eastern Restaurant
Name:  Hailed Coffee 	Lat:  43.65883296982352 	Long:  -79.38368351986598 	Category:  Coffee Shop
Name:  Neo Coffee Bar 	Lat:  43.66014 	Long:  -79.38587 	Category:  Coffee Shop
Name:  Mercatto 	Lat:  43.660390911898546 	Long:  -79.38766421192705 	Category:  Italian Restaurant
Name:  The Elm Tree Restaurant 	Lat:  43.65739749535259 	Long:  -79.38376054171513 	Category:  Modern European Restaurant
Name:  KAKA 	Lat:  43.65745745164475 	Long:  -79.38419169987876 	Category:  Japanese Restaurant
Name:  Bubble Bath & Spa 	Lat:  43.65904951746615 	Long:  -79.38534357912432 	Category:  Spa
Name:  Chatime 日出茶太 	Lat:  43.65554164147378 	Long:  -79.38468427043244 	Category:  Bubble Tea Sho

Name:  Harbourfront 	Lat:  43.639525632239106 	Long:  -79.38068838052389 	Category:  Neighborhood
Name:  Roundhouse Park 	Lat:  43.64174513889102 	Long:  -79.38427882922346 	Category:  Park
Name:  BeaverTails 	Lat:  43.639736 	Long:  -79.380068 	Category:  Dessert Shop
Name:  Lake Ontario 	Lat:  43.63894493157648 	Long:  -79.37966465950012 	Category:  Lake
Name:  Harbourfront Centre 	Lat:  43.63855578926675 	Long:  -79.38319016270458 	Category:  Performing Arts Venue
Name:  iQ Food Co 	Lat:  43.642851 	Long:  -79.382081 	Category:  Salad Place
Name:  Maple Leaf Square 	Lat:  43.64292522840183 	Long:  -79.38089182467687 	Category:  Plaza
Name:  Delta Hotels by Marriott Toronto 	Lat:  43.6428819 	Long:  -79.3839491 	Category:  Hotel
Name:  Real Sports Apparel 	Lat:  43.64285984835777 	Long:  -79.38018363195047 	Category:  Sporting Goods Shop
Name:  Natrel Pond/Rink 	Lat:  43.6384313677662 	Long:  -79.38252754335107 	Category:  Skating Rink
Name:  Sharetea 	Lat:  43.640175967763966 	Long:

Name:  Canoe 	Lat:  43.647452066183476 	Long:  -79.38132001815676 	Category:  Restaurant
Name:  Pilot Coffee Roasters 	Lat:  43.648835391245896 	Long:  -79.3809358125033 	Category:  Coffee Shop
Name:  Equinox Bay Street 	Lat:  43.64809974034856 	Long:  -79.37998869411526 	Category:  Gym
Name:  Adelaide Club Toronto 	Lat:  43.64927944291712 	Long:  -79.38192123284551 	Category:  Gym / Fitness Center
Name:  The Fairmont Royal York 	Lat:  43.64544914616651 	Long:  -79.3815076529019 	Category:  Hotel
Name:  Brick Street Bakery 	Lat:  43.6488151631615 	Long:  -79.38060468610949 	Category:  Bakery
Name:  Walrus Pub & Beer Hall 	Lat:  43.64737470920577 	Long:  -79.37951515363918 	Category:  Pub
Name:  DAVIDsTEA 	Lat:  43.64650585612388 	Long:  -79.38014477491379 	Category:  Tea Room
Name:  Maman 	Lat:  43.64830873804684 	Long:  -79.38225333898805 	Category:  Café
Name:  WVRST 	Lat:  43.64496809087762 	Long:  -79.38137631843854 	Category:  Beer Bar
Name:  Mos Mos Coffee 	Lat:  43.6481591975350

Name:  Adelaide Club Toronto 	Lat:  43.64927944291712 	Long:  -79.38192123284551 	Category:  Gym / Fitness Center
Name:  Canoe 	Lat:  43.647452066183476 	Long:  -79.38132001815676 	Category:  Restaurant
Name:  Pilot Coffee Roasters 	Lat:  43.648835391245896 	Long:  -79.3809358125033 	Category:  Coffee Shop
Name:  Mos Mos Coffee 	Lat:  43.64815919753501 	Long:  -79.37874450467258 	Category:  Café
Name:  Equinox Bay Street 	Lat:  43.64809974034856 	Long:  -79.37998869411526 	Category:  Gym
Name:  Hockey Hall Of Fame (Hockey Hall of Fame) 	Lat:  43.646974451431056 	Long:  -79.37732323942681 	Category:  Museum
Name:  Brick Street Bakery 	Lat:  43.6488151631615 	Long:  -79.38060468610949 	Category:  Bakery
Name:  Walrus Pub & Beer Hall 	Lat:  43.64737470920577 	Long:  -79.37951515363918 	Category:  Pub
Name:  DAVIDsTEA 	Lat:  43.64650585612388 	Long:  -79.38014477491379 	Category:  Tea Room
Name:  Maman 	Lat:  43.64830873804684 	Long:  -79.38225333898805 	Category:  Café
Name:  Café Plenty 

Name:  Rosalind's Garden Oasis 	Lat:  43.71218888050602 	Long:  -79.41197784736922 	Category:  Garden
Name:  Havergal College 	Lat:  43.712108 	Long:  -79.41168 	Category:  Music Venue
Name:  Menchie's St. Clair West 	Lat:  43.707664 	Long:  -79.414301 	Category:  Ice Cream Shop


Name:  Kay Gardner Beltline Trail 	Lat:  43.700726252588964 	Long:  -79.4101006611444 	Category:  Trail
Name:  Kay Gardner Beltline Trail 	Lat:  43.698445835574525 	Long:  -79.40687298774719 	Category:  Trail
Name:  TTC Bus #14 Glencairn 	Lat:  43.70022082088619 	Long:  -79.41027359709935 	Category:  Bus Line
Name:  Forest Hill Road Park 	Lat:  43.69794474434431 	Long:  -79.4066047668457 	Category:  Park
Name:  Nikko Sushi Japenese Restaurant 	Lat:  43.700443369880006 	Long:  -79.40795690464067 	Category:  Sushi Restaurant
Name:  Oliver jewelry 	Lat:  43.70037360267825 	Long:  -79.4076437784668 	Category:  Jewelry Store


Name:  Ezra's Pound 	Lat:  43.67515283323029 	Long:  -79.40585846415303 	Category:  Café

Name:  Billy Bishop Toronto City Airport (YTZ) (Billy Bishop Toronto City Airport) 	Lat:  43.63168259661481 	Long:  -79.3960334124689 	Category:  Airport
Name:  Porter Lounge 	Lat:  43.63068 	Long:  -79.395756 	Category:  Airport Lounge
Name:  Toronto Harbour 	Lat:  43.63304549594827 	Long:  -79.39648421332333 	Category:  Harbor / Marina
Name:  Billy Bishop Café 	Lat:  43.63113165872724 	Long:  -79.39613873448842 	Category:  Airport Food Court
Name:  Air Canada Check-In Counter 	Lat:  43.631225969080035 	Long:  -79.3959873701636 	Category:  Airport Terminal
Name:  Gate 8 	Lat:  43.63153617968721 	Long:  -79.39456989797382 	Category:  Airport Gate
Name:  Balzac’s Coffee Roasters 	Lat:  43.631392 	Long:  -79.395952 	Category:  Coffee Shop
Name:  Crew Room 	Lat:  43.63135993598351 	Long:  -79.39610738952823 	Category:  Airport Lounge
Name:  Want Passport 	Lat:  43.63148266310702 	Long:  -79.39607739313684 	Category:  Boutique
Name:  Porter Airlines Check-In Counter 	Lat:  43.631683 	Long:

Name:  Adelaide Club Toronto 	Lat:  43.64927944291712 	Long:  -79.38192123284551 	Category:  Gym / Fitness Center
Name:  Canoe 	Lat:  43.647452066183476 	Long:  -79.38132001815676 	Category:  Restaurant
Name:  Pilot Coffee Roasters 	Lat:  43.648835391245896 	Long:  -79.3809358125033 	Category:  Coffee Shop
Name:  Equinox Bay Street 	Lat:  43.64809974034856 	Long:  -79.37998869411526 	Category:  Gym
Name:  Brick Street Bakery 	Lat:  43.6488151631615 	Long:  -79.38060468610949 	Category:  Bakery
Name:  Pizzeria Libretto 	Lat:  43.64833354686235 	Long:  -79.38511050184168 	Category:  Pizza Place
Name:  Maman 	Lat:  43.64830873804684 	Long:  -79.38225333898805 	Category:  Café
Name:  Hy's Steakhouse 	Lat:  43.649504756476944 	Long:  -79.38291850488223 	Category:  Steakhouse
Name:  The Keg Steakhouse + Bar - York Street 	Lat:  43.64998659318569 	Long:  -79.38410336664538 	Category:  Restaurant
Name:  Sam James Coffee Bar (SJCB) 	Lat:  43.64788137014028 	Long:  -79.38433152836829 	Category: 

Name:  Fiesta Farms 	Lat:  43.66847077052224 	Long:  -79.42048512748114 	Category:  Grocery Store
Name:  Contra Cafe 	Lat:  43.669107144072974 	Long:  -79.42610471852032 	Category:  Café
Name:  Starbucks 	Lat:  43.67153 	Long:  -79.4214 	Category:  Coffee Shop
Name:  Vinny’s Panini 	Lat:  43.67067884412717 	Long:  -79.42614819830024 	Category:  Italian Restaurant
Name:  Scout and Cash Caffe 	Lat:  43.66735987720157 	Long:  -79.41993774586636 	Category:  Café
Name:  Universal Grill 	Lat:  43.670549820711926 	Long:  -79.42654054306477 	Category:  Diner
Name:  Actinolite 	Lat:  43.66785822046965 	Long:  -79.42805427320322 	Category:  Restaurant
Name:  Stubbe Chocolates 	Lat:  43.67156577026589 	Long:  -79.42128899001831 	Category:  Candy Store
Name:  Faema Caffe 	Lat:  43.6710456734398 	Long:  -79.41929697990417 	Category:  Café
Name:  Loblaws 	Lat:  43.671657 	Long:  -79.421364 	Category:  Grocery Store
Name:  Sobeys Dupont 	Lat:  43.670977 	Long:  -79.427573 	Category:  Grocery Store
Na

Name:  Lithuania Park 	Lat:  43.658666653470426 	Long:  -79.46303844451904 	Category:  Park
Name:  nodo 	Lat:  43.665303321627505 	Long:  -79.46562068004896 	Category:  Italian Restaurant
Name:  Hole in the Wall 	Lat:  43.66529624102518 	Long:  -79.46511783107383 	Category:  Bar
Name:  Indie Alehouse 	Lat:  43.66547472315272 	Long:  -79.46528973047275 	Category:  Gastropub
Name:  Junction Flea 	Lat:  43.665258286011984 	Long:  -79.462867677381 	Category:  Flea Market
Name:  SMASH 	Lat:  43.665496255078395 	Long:  -79.46553722469592 	Category:  Antique Shop
Name:  ARTiculations 	Lat:  43.6655499084009 	Long:  -79.46719380367311 	Category:  Arts & Crafts Store
Name:  famous last words 	Lat:  43.665181 	Long:  -79.468471 	Category:  Speakeasy
Name:  Mjölk 	Lat:  43.665432436685634 	Long:  -79.46796194112322 	Category:  Furniture / Home Store
Name:  Junction City Music Hall 	Lat:  43.66533428942121 	Long:  -79.46625298427486 	Category:  Music Venue
Name:  The Beet Organic Café 	Lat:  43.66

Name:  Rorschach Brewing Co. 	Lat:  43.6634831695922 	Long:  -79.31982368639481 	Category:  Brewery
Name:  Leslieville Farmers Market 	Lat:  43.664901398008624 	Long:  -79.31978354343948 	Category:  Farmers Market
Name:  The Sidekick 	Lat:  43.66448428095712 	Long:  -79.3251615270421 	Category:  Comic Shop
Name:  Chino Locos 	Lat:  43.664653279986126 	Long:  -79.32558448882219 	Category:  Burrito Place
Name:  Queen Margherita Pizza 	Lat:  43.664685391781596 	Long:  -79.32416396716876 	Category:  Pizza Place
Name:  Chick-n-Joy 	Lat:  43.66518127981666 	Long:  -79.32140349279904 	Category:  Fast Food Restaurant
Name:  Ashbridges Bay Skatepark 	Lat:  43.662547867049305 	Long:  -79.31563069104494 	Category:  Skate Park
Name:  The Green Wood 	Lat:  43.66472825054798 	Long:  -79.32411719351869 	Category:  Restaurant
Name:  East End Garden Centre & Hardware 	Lat:  43.664564 	Long:  -79.324471 	Category:  Garden Center
Name:  Amin Car Repair Garage 	Lat:  43.66354387979742 	Long:  -79.32013017

In [30]:
# View the venues list and formatting
venues_list


[['The Beaches',
  43.67635739999999,
  -79.2930312,
  'Glen Manor Ravine',
  43.67682094413784,
  -79.29394208780985,
  'Trail'],
 ['The Beaches',
  43.67635739999999,
  -79.2930312,
  'The Big Carrot Natural Food Market',
  43.678879,
  -79.297734,
  'Health Food Store'],
 ['The Beaches',
  43.67635739999999,
  -79.2930312,
  'Grover Pub and Grub',
  43.679181434941015,
  -79.29721535878515,
  'Pub'],
 ['The Beaches',
  43.67635739999999,
  -79.2930312,
  'Upper Beaches',
  43.68056321147582,
  -79.2928688743688,
  'Neighborhood'],
 ['The Danforth West, Riverdale',
  43.6795571,
  -79.352188,
  'MenEssentials',
  43.677820068604575,
  -79.35126543045044,
  'Cosmetics Shop'],
 ['The Danforth West, Riverdale',
  43.6795571,
  -79.352188,
  'Pantheon',
  43.67762124481265,
  -79.35143390043564,
  'Greek Restaurant'],
 ['The Danforth West, Riverdale',
  43.6795571,
  -79.352188,
  'La Diperie',
  43.677702,
  -79.352265,
  'Ice Cream Shop'],
 ['The Danforth West, Riverdale',
  43.6795571

In [31]:
# Create a dataframe with the previously obtained venues data from the foursquare API

df_neigh_venues=pd.DataFrame(venues_list)
df_neigh_venues.columns = ['Neighborhood Name', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']



In [32]:
df_neigh_venues

Unnamed: 0,Neighborhood Name,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,MenEssentials,43.677820,-79.351265,Cosmetics Shop
...,...,...,...,...,...,...,...
1618,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Olliffe On Queen,43.664503,-79.324768,Butcher
1619,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,TTC Stop #03049,43.664470,-79.325145,Light Rail Station
1620,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Greenwood Cigar & Variety,43.664538,-79.325379,Smoke Shop
1621,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Revolution Recording,43.662561,-79.326940,Recording Studio


In [33]:
# Group neighborhood data by neighborhoods
df_neigh_venues.groupby('Neighborhood Name').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,58,58,58,58,58,58
"Brockton, Parkdale Village, Exhibition Place",22,22,22,22,22,22
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",18,18,18,18,18,18
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",14,14,14,14,14,14
Central Bay Street,65,65,65,65,65,65
Christie,17,17,17,17,17,17
Church and Wellesley,76,76,76,76,76,76
"Commerce Court, Victoria Hotel",100,100,100,100,100,100
Davisville,35,35,35,35,35,35
Davisville North,8,8,8,8,8,8


In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(df_neigh_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood Name'] = df_neigh_venues['Neighborhood Name'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood Name,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
toronto_grouped = toronto_onehot.groupby('Neighborhood Name').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood Name,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.071429,0.071429,0.071429,0.142857,0.142857,0.071429,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.015385,0.0,0.0,0.015385,0.0,0.0,0.015385
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.013158,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013158,0.0,0.026316
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,...,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [36]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood Name']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood Name'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05
2  Seafood Restaurant  0.03
3         Cheese Shop  0.03
4              Bakery  0.03


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.14
1  Breakfast Spot  0.09
2     Coffee Shop  0.09
3             Gym  0.05
4    Intersection  0.05


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                venue  freq
0  Light Rail Station  0.11
1                Park  0.06
2    Recording Studio  0.06
3          Smoke Shop  0.06
4      Farmers Market  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
             venue  freq
0   Airport Lounge  0.14
1  Airport Service  0.14
2         Boutique  0.07
3  Harbor / Marina  0.07
4    Boat or Ferry  0.07


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0

In [37]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [38]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood Name']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood Name'] = toronto_grouped['Neighborhood Name']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood Name,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Bakery,Restaurant,Café,Cheese Shop,Beer Bar,Clothing Store,Department Store
1,"Brockton, Parkdale Village, Exhibition Place",Café,Coffee Shop,Breakfast Spot,Grocery Store,Stadium,Burrito Place,Restaurant,Climbing Gym,Pet Store,Bakery
2,"Business reply mail Processing Centre, South C...",Light Rail Station,Pizza Place,Smoke Shop,Brewery,Burrito Place,Farmers Market,Fast Food Restaurant,Butcher,Restaurant,Recording Studio
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Boat or Ferry,Boutique,Rental Car Location,Coffee Shop,Harbor / Marina,Sculpture Garden,Airport Terminal,Airport Gate
4,Central Bay Street,Coffee Shop,Italian Restaurant,Sandwich Place,Café,Japanese Restaurant,Bubble Tea Shop,Middle Eastern Restaurant,Department Store,Thai Restaurant,Salad Place
5,Christie,Grocery Store,Café,Park,Baby Store,Nightclub,Italian Restaurant,Athletics & Sports,Diner,Candy Store,Restaurant
6,Church and Wellesley,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Pub,Men's Store,Mediterranean Restaurant,Hotel,Yoga Studio
7,"Commerce Court, Victoria Hotel",Coffee Shop,Restaurant,Café,Hotel,Gym,American Restaurant,Italian Restaurant,Seafood Restaurant,Japanese Restaurant,Tea Room
8,Davisville,Pizza Place,Sandwich Place,Dessert Shop,Coffee Shop,Italian Restaurant,Gym,Café,Sushi Restaurant,Park,Pharmacy
9,Davisville North,Park,Gym,Pizza Place,Breakfast Spot,Hotel,Department Store,Food & Drink Shop,Sandwich Place,Yoga Studio,Diner


In [39]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood Name', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int32)

In [40]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = Toronto_df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood Name'), on='Neighborhood Name')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal_Code,Borough,Neighborhood Name,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Trail,Neighborhood,Pub,Health Food Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Yoga Studio
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,2,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Frozen Yogurt Shop,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Pub,Pizza Place
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572,2,Sandwich Place,Fast Food Restaurant,Park,Fish & Chips Shop,Pet Store,Movie Theater,Pizza Place,Pub,Restaurant,Burrito Place
3,M4M,East Toronto,Studio District,43.659526,-79.340923,2,Café,Coffee Shop,Gastropub,Bakery,Brewery,American Restaurant,Yoga Studio,Comfort Food Restaurant,Seafood Restaurant,Sandwich Place
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Swim School,Bus Line,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [41]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood Name'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Cluster 1

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Trail,Neighborhood,Pub,Health Food Store,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Yoga Studio
4,Central Toronto,0,Park,Swim School,Bus Line,Event Space,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
23,Central Toronto,0,Trail,Park,Jewelry Store,Sushi Restaurant,Bus Line,Yoga Studio,Dessert Shop,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


## Cluster 2

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Central Toronto,1,Ice Cream Shop,Garden,Music Venue,Yoga Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


## Cluster 3

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,2,Greek Restaurant,Coffee Shop,Italian Restaurant,Bookstore,Frozen Yogurt Shop,Ice Cream Shop,Furniture / Home Store,Yoga Studio,Pub,Pizza Place
2,East Toronto,2,Sandwich Place,Fast Food Restaurant,Park,Fish & Chips Shop,Pet Store,Movie Theater,Pizza Place,Pub,Restaurant,Burrito Place
3,East Toronto,2,Café,Coffee Shop,Gastropub,Bakery,Brewery,American Restaurant,Yoga Studio,Comfort Food Restaurant,Seafood Restaurant,Sandwich Place
5,Central Toronto,2,Park,Gym,Pizza Place,Breakfast Spot,Hotel,Department Store,Food & Drink Shop,Sandwich Place,Yoga Studio,Diner
6,Central Toronto,2,Clothing Store,Coffee Shop,Chinese Restaurant,Shoe Store,Seafood Restaurant,Salon / Barbershop,Restaurant,Rental Car Location,Café,Yoga Studio
7,Central Toronto,2,Pizza Place,Sandwich Place,Dessert Shop,Coffee Shop,Italian Restaurant,Gym,Café,Sushi Restaurant,Park,Pharmacy
9,Central Toronto,2,Pub,Coffee Shop,Bank,Sushi Restaurant,Bagel Shop,Sports Bar,Athletics & Sports,Fried Chicken Joint,Restaurant,Pizza Place
11,Downtown Toronto,2,Coffee Shop,Restaurant,Park,Café,Pub,Italian Restaurant,Bakery,Chinese Restaurant,Pizza Place,Convenience Store
12,Downtown Toronto,2,Coffee Shop,Sushi Restaurant,Japanese Restaurant,Restaurant,Gay Bar,Pub,Men's Store,Mediterranean Restaurant,Hotel,Yoga Studio
13,Downtown Toronto,2,Coffee Shop,Bakery,Pub,Park,Breakfast Spot,Restaurant,Café,Theater,Yoga Studio,Cosmetics Shop


## Cluster 4

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Park,Playground,Trail,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


## Cluster 5

In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Central Toronto,4,Playground,Tennis Court,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
