# Part one: Scraping Wikipedia & Creating Dataframe

In [58]:
#importing required libraries
import requests
from bs4 import BeautifulSoup

response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
webpage = response.content
print(response.status_code)
soup = BeautifulSoup(webpage, 'html.parser')

200


In [59]:
table_contents = []
table = soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text == 'Not assigned':
        pass
    else:
        cell['Postal Code'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')','')).strip(' ')
        table_contents.append(cell)
print(table_contents)
    

[{'Postal Code': 'M3A', 'Borough': 'North York', 'Neighborhood': 'Parkwoods'}, {'Postal Code': 'M4A', 'Borough': 'North York', 'Neighborhood': 'Victoria Village'}, {'Postal Code': 'M5A', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Regent Park, Harbourfront'}, {'Postal Code': 'M6A', 'Borough': 'North York', 'Neighborhood': 'Lawrence Manor, Lawrence Heights'}, {'Postal Code': 'M7A', 'Borough': "Queen's Park", 'Neighborhood': 'Ontario Provincial Government'}, {'Postal Code': 'M9A', 'Borough': 'Etobicoke', 'Neighborhood': 'Islington Avenue'}, {'Postal Code': 'M1B', 'Borough': 'Scarborough', 'Neighborhood': 'Malvern, Rouge'}, {'Postal Code': 'M3B', 'Borough': 'North York', 'Neighborhood': 'Don MillsNorth'}, {'Postal Code': 'M4B', 'Borough': 'East York', 'Neighborhood': 'Parkview Hill, Woodbine Gardens'}, {'Postal Code': 'M5B', 'Borough': 'Downtown Toronto', 'Neighborhood': 'Garden District, Ryerson'}, {'Postal Code': 'M6B', 'Borough': 'North York', 'Neighborhood': 'Glencairn'}, {'Postal

In [60]:
import pandas as pd
import numpy as np

df = pd.DataFrame(table_contents)
df['Borough'] = df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                       'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                       'EtobicokeNorthwest':'Etobicoke Northwest',
                                       'EastYorkEast Toronto':'East York/East Toronto',
                                       'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [61]:
df.shape

(103, 3)

# Part two: Geospatial Dataset

In [62]:
!pip install geocoder

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [63]:
# import geocoder

# lat_lng_coords = None

# while(lat_lng_coords is None):
#     g = geocoder.google('Toronto, Ontario')
#     lat_lng_coords = g.latlng
    
# latitude = lat_lng_coords[0]
# longitude = lat_lng_coords[1]

In [64]:
import urllib.request

url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv'
file = 'Geospatial_Coordinates.csv'
urllib.request.urlretrieve(url,file)

df_geo = pd.read_csv(file)
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [65]:
df_sorted = df.sort_values(by='Postal Code', ascending=False)
df_sorted

Unnamed: 0,Postal Code,Borough,Neighborhood
94,M9W,Etobicoke Northwest,"Clairville, Humberwood, Woodbine Downs, West H..."
89,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
77,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
70,M9P,Etobicoke,Westmount
64,M9N,York,Weston
...,...,...,...
26,M1H,Scarborough,Cedarbrae
22,M1G,Scarborough,Woburn
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"


In [66]:
dataset = pd.merge(df_sorted,df_geo,on='Postal Code')
dataset

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M9W,Etobicoke Northwest,"Clairville, Humberwood, Woodbine Downs, West H...",43.706748,-79.594054
1,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
2,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9N,York,Weston,43.706876,-79.518188
...,...,...,...,...,...
98,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
99,M1G,Scarborough,Woburn,43.770992,-79.216917
100,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
101,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497


# Part three: Data Analysis

### Exploring data via Foursquare

In [67]:
# required libraries

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!pip install folium
import folium # map rendering library

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [68]:
print('This dataset has {} boroughs and {} neighborhoods.'.format(
        len(dataset['Borough'].unique()),
        dataset.shape[0]
    )
)

This dataset has 15 boroughs and 103 neighborhoods.


#### Create a map of Toronto

In [69]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [70]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dataset['Latitude'], dataset['Longitude'], dataset['Borough'], dataset['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [71]:
etobicoke_data = dataset[dataset['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicoke_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437
1,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
2,M9P,Etobicoke,Westmount,43.696319,-79.532242
3,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
4,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724


In [72]:
address = 'Etobicoke, CA'

geolocator = Nominatim(user_agent="ca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Etobicoke are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Etobicoke are 43.6435559, -79.5656326.


In [73]:
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(etobicoke_data['Latitude'], etobicoke_data['Longitude'], dataset['Borough'], dataset['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='white',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke

#### Connecting to Foursquare API

In [74]:
CLIENT_ID = 'WVSVXJZNSX2RTUYH3EN414GA25RS2LCBD20FKHPWCZZH41JT' # your Foursquare ID
CLIENT_SECRET = 'TSHIWJ4MHEDR4LILPO2XNIURO4F5UJQA5YESZFQEKAZFYS32' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

#### Explore first neighborhood

In [75]:
etobicoke_data.loc[2, 'Neighborhood']

'Westmount'

In [76]:
# westmounts latitude and longitude
westmount_latitude = etobicoke_data.loc[2, 'Latitude'] # neighborhood latitude value
westmount_longitude = etobicoke_data.loc[2, 'Longitude'] # neighborhood longitude value

neighborhood_name = etobicoke_data.loc[2, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               westmount_latitude, 
                                                               westmount_longitude))

Latitude and longitude values of Westmount are 43.696319, -79.53224240000002.


#### get the top 100 venues that are in Westmount within a radius of 500 meters.


In [77]:
limit = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID,CLIENT_SECRET,VERSION,westmount_latitude,westmount_longitude,radius,limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=WVSVXJZNSX2RTUYH3EN414GA25RS2LCBD20FKHPWCZZH41JT&client_secret=TSHIWJ4MHEDR4LILPO2XNIURO4F5UJQA5YESZFQEKAZFYS32&v=20180605&ll=43.696319,-79.53224240000002&radius=500&limit=100'

In [78]:
#get results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60abc2f31a5bc765c24bec8e'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Humber Heights - Westmount',
  'headerFullLocation': 'Humber Heights - Westmount, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 9,
  'suggestedBounds': {'ne': {'lat': 43.700819004500005,
    'lng': -79.52603004751754},
   'sw': {'lat': 43.6918189955, 'lng': -79.5384547524825}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54089623498e19198cf69267',
       'name': 'Starbucks',
       'location': {'address': '1564 Royal York Road',
        'crossStreet': 'Royal York/The Westway',
        'lat': 43.696338,
        'lng': -79.533398,
        '

In [79]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [80]:
# clean json and create pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Starbucks,Coffee Shop,43.696338,-79.533398
1,Mayflower Chinese Food,Chinese Restaurant,43.692753,-79.531566
2,Pizza Hut Etobicoke,Pizza Place,43.696562,-79.533506
3,Subway,Sandwich Place,43.692927,-79.531471
4,Pizza Nova,Pizza Place,43.692817,-79.53157


In [81]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

9 venues were returned by Foursquare.


## Explore neighborhoods in Etobicoke

In [82]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [83]:
etobicoke_venues = getNearbyVenues(names=etobicoke_data['Neighborhood'],
                                  latitudes = etobicoke_data['Latitude'],
                                  longitudes = etobicoke_data['Longitude'])

etobicoke_venues.head()

South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
Westmount
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Islington Avenue
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
The Kingsway, Montgomery Road, Old Mill North
Alderwood, Long Branch
New Toronto, Mimico South, Humber Bay Shores


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Shoppers Drug Mart,43.741685,-79.584487,Pharmacy
1,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Popeyes Louisiana Kitchen,43.741209,-79.584332,Fried Chicken Joint
2,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Subway,43.742645,-79.589643,Sandwich Place
3,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,The Beer Store,43.741694,-79.584373,Beer Store
4,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Sheriff's No Frills,43.741696,-79.584379,Grocery Store


In [84]:
print(etobicoke_venues.shape)

(69, 7)


In [85]:
etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",9,9,9,9,9,9
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",8,8,8,8,8,8
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",4,4,4,4,4,4
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",14,14,14,14,14,14
"New Toronto, Mimico South, Humber Bay Shores",12,12,12,12,12,12
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",1,1,1,1,1,1
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",10,10,10,10,10,10
"The Kingsway, Montgomery Road, Old Mill North",1,1,1,1,1,1
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",1,1,1,1,1,1
Westmount,9,9,9,9,9,9


In [86]:
print('There are {} unique categories.'.format(len(etobicoke_venues['Venue Category'].unique())))

There are 36 unique categories.


## Analyze each neighborhood

In [87]:
# one hot encoding
etobicoke_onehot = pd.get_dummies(etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
etobicoke_onehot['Neighborhood'] = etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [etobicoke_onehot.columns[-1]] + list(etobicoke_onehot.columns[:-1])
etobicoke_onehot = etobicoke_onehot[fixed_columns]

etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Baseball Field,Beer Store,Burger Joint,Bus Line,Café,Chinese Restaurant,Coffee Shop,...,Pub,Restaurant,River,Sandwich Place,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Thrift / Vintage Store,Wings Joint
0,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"South Steeles, Silverstone, Humbergate, Jamest...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [88]:
etobicoke_onehot.shape

(69, 37)

In [89]:
etobicoke_grouped = etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Baseball Field,Beer Store,Burger Joint,Bus Line,Café,Chinese Restaurant,Coffee Shop,...,Pub,Restaurant,River,Sandwich Place,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Thrift / Vintage Store,Wings Joint
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.111111,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Kingsview Village, St. Phillips, Martin Grove ...",0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.071429,0.0,0.071429,0.071429,0.071429,0.071429,0.071429
4,"New Toronto, Mimico South, Humber Bay Shores",0.083333,0.083333,0.0,0.0,0.0,0.0,0.083333,0.0,0.166667,...,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
7,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"West Deane Park, Princess Gardens, Martin Grov...",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Westmount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.111111,...,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0


In [90]:
etobicoke_grouped.shape

(10, 37)

In [91]:
num_top_venues = 5

for hood in etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = etobicoke_grouped[etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
          venue  freq
0   Pizza Place  0.22
1           Pub  0.11
2      Pharmacy  0.11
3           Gym  0.11
4  Skating Rink  0.11


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
          venue  freq
0      Pharmacy  0.12
1   Pizza Place  0.12
2    Beer Store  0.12
3  Liquor Store  0.12
4          Café  0.12


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
               venue  freq
0  Mobile Phone Shop  0.25
1        Pizza Place  0.25
2           Bus Line  0.25
3     Sandwich Place  0.25
4         Restaurant  0.00


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
            venue  freq
0     Wings Joint  0.07
1  Sandwich Place  0.07
2          Bakery  0.07
3  Hardware Store  0.07
4             Gym  0.07


----New Toronto, Mimico South, Humber Bay Shores----
                  venue  freq
0           Coffee Shop  0.17
1   American Restaurant  

In [92]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [93]:
# display top ten venues in neighborhood

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = etobicoke_grouped['Neighborhood']

for ind in np.arange(etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Pool,Pharmacy,Skating Rink,Sandwich Place,Coffee Shop,Pub,Gym,Fried Chicken Joint,Fast Food Restaurant
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Liquor Store,Café,Park,Pharmacy,Pizza Place,Convenience Store,Coffee Shop,Beer Store,Bus Line,Chinese Restaurant
2,"Kingsview Village, St. Phillips, Martin Grove ...",Mobile Phone Shop,Sandwich Place,Bus Line,Pizza Place,Wings Joint,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store
3,"Mimico NW, The Queensway West, South of Bloor,...",Wings Joint,Burger Joint,Grocery Store,Thrift / Vintage Store,Fast Food Restaurant,Discount Store,Convenience Store,Gym,Hardware Store,Sandwich Place
4,"New Toronto, Mimico South, Humber Bay Shores",Coffee Shop,Liquor Store,Pizza Place,Bakery,Café,Fast Food Restaurant,Gym,Mexican Restaurant,Pharmacy,American Restaurant


## Cluster Neighborhoods

In [94]:
# set number of clusters
kclusters = 6

etobicoke_grouped_clustering = etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 5, 0, 4, 2, 0, 1, 3, 4], dtype=int32)

In [96]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

etobicoke_merged = etobicoke_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
etobicoke_merged = etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

etobicoke_merged.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,0.0,Grocery Store,Liquor Store,Fried Chicken Joint,Beer Store,Fast Food Restaurant,Sandwich Place,Discount Store,Pharmacy,Pizza Place,Convenience Store
1,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,5.0,Mobile Phone Shop,Sandwich Place,Bus Line,Pizza Place,Wings Joint,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store
2,M9P,Etobicoke,Westmount,43.696319,-79.532242,4.0,Pizza Place,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Discount Store,Playground,Intersection,Sandwich Place,Bus Line,Café
3,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,4.0,Liquor Store,Café,Park,Pharmacy,Pizza Place,Convenience Store,Coffee Shop,Beer Store,Bus Line,Chinese Restaurant
4,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,3.0,Bakery,Wings Joint,Convenience Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Coffee Shop


In [97]:
# check for missing values
etobicoke_merged.isnull()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True
6,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [100]:
# drop row with missing values
etobicoke_merged.dropna(inplace=True)

In [101]:
# recheck
etobicoke_merged.isnull()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
10,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [104]:
# set values as integer to prepare data for visualization
etobicoke_merged['Cluster Labels'] = etobicoke_merged['Cluster Labels'].astype(int)

## Visualize Cluster

In [105]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(etobicoke_merged['Latitude'], etobicoke_merged['Longitude'], etobicoke_merged['Neighborhood'], etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

### Cluster 1

In [106]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 0, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,0,Grocery Store,Liquor Store,Fried Chicken Joint,Beer Store,Fast Food Restaurant,Sandwich Place,Discount Store,Pharmacy,Pizza Place,Convenience Store
6,Etobicoke,0,Wings Joint,Burger Joint,Grocery Store,Thrift / Vintage Store,Fast Food Restaurant,Discount Store,Convenience Store,Gym,Hardware Store,Sandwich Place


### Cluster 2

In [107]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 1, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Etobicoke,1,River,Wings Joint,Coffee Shop,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Convenience Store,Chinese Restaurant


### Cluster 3

In [108]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 2, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Etobicoke,2,Baseball Field,Wings Joint,Convenience Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Coffee Shop


### Cluster 4

In [109]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 3, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Etobicoke,3,Bakery,Wings Joint,Convenience Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store,Coffee Shop


### Cluster 5

In [110]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 4, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,4,Pizza Place,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Discount Store,Playground,Intersection,Sandwich Place,Bus Line,Café
3,Etobicoke,4,Liquor Store,Café,Park,Pharmacy,Pizza Place,Convenience Store,Coffee Shop,Beer Store,Bus Line,Chinese Restaurant
9,Etobicoke,4,Pizza Place,Pool,Pharmacy,Skating Rink,Sandwich Place,Coffee Shop,Pub,Gym,Fried Chicken Joint,Fast Food Restaurant
10,Etobicoke,4,Coffee Shop,Liquor Store,Pizza Place,Bakery,Café,Fast Food Restaurant,Gym,Mexican Restaurant,Pharmacy,American Restaurant


### Cluster 6

In [111]:
etobicoke_merged.loc[etobicoke_merged['Cluster Labels'] == 5, etobicoke_merged.columns[[1] + list(range(5, etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,5,Mobile Phone Shop,Sandwich Place,Bus Line,Pizza Place,Wings Joint,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Discount Store


### Disclaimer: <br> This notebook is for practice and study purposes! <br>--- John Oxales 05/24/2021