# Explore and cluster the neighborhoods in Toronto.

In [1]:
import pandas as pd

# Download data

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
dfs = pd.read_html(url)

print(len(dfs))

3


In [3]:
print(dfs[0])

    Postal Code           Borough  \
0           M1A      Not assigned   
1           M2A      Not assigned   
2           M3A        North York   
3           M4A        North York   
4           M5A  Downtown Toronto   
..          ...               ...   
175         M5Z      Not assigned   
176         M6Z      Not assigned   
177         M7Z      Not assigned   
178         M8Z         Etobicoke   
179         M9Z      Not assigned   

                                         Neighbourhood  
0                                         Not assigned  
1                                         Not assigned  
2                                            Parkwoods  
3                                     Victoria Village  
4                            Regent Park, Harbourfront  
..                                                 ...  
175                                       Not assigned  
176                                       Not assigned  
177                                       

# Convert to datafram

In [4]:
dfs = pd.read_html(url)                                                                                                         
df = dfs[0]
dataframe = df[['Postal Code','Borough','Neighbourhood']]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
df=df[df.Borough != 'Not assigned']
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [6]:
df.shape

(103, 3)

# Add Latitude and Longitude to the dataframe

In [7]:
!pip3 install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 9.1 MB/s  eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [8]:
import geocoder

In [9]:
latitude=[]
longitude=[]
for code in df['Postal Code']:
    g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
    print(code, g.latlng)
    while (g.latlng is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
        print(code, g.latlng)
    latlng = g.latlng
    latitude.append(latlng[0])
    longitude.append(latlng[1])

M3A [43.75245000000007, -79.32990999999998]
M4A [43.73057000000006, -79.31305999999995]
M5A [43.65512000000007, -79.36263999999994]
M6A [43.72327000000007, -79.45041999999995]
M7A [43.66253000000006, -79.39187999999996]
M9A [43.662630000000036, -79.52830999999998]
M1B [43.811390000000074, -79.19661999999994]
M3B [43.74923000000007, -79.36185999999998]
M4B [43.70718000000005, -79.31191999999999]
M5B [43.65739000000008, -79.37803999999994]
M6B [43.70687000000004, -79.44811999999996]
M9B [43.65034000000003, -79.55361999999997]
M1C [43.78574000000003, -79.15874999999994]
M3C [43.72168000000005, -79.34351999999996]
M4C [43.68970000000007, -79.30681999999996]
M5C [43.65215000000006, -79.37586999999996]
M6C [43.69211000000007, -79.43035999999995]
M9C [43.64857000000006, -79.57824999999997]
M1E [43.765750000000025, -79.17469999999997]
M4E [43.67709000000008, -79.29546999999997]
M5E [43.64536000000004, -79.37305999999995]
M6E [43.68784000000005, -79.45045999999996]
M1G [43.76812000000007, -79.2

# Explore and Cluster Toronto

In [11]:
!pip3 install folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 4.0 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [12]:
import folium

In [13]:
toronto_df = df[df['Borough'].str.contains('Toronto')]
toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
13,M5B,Downtown Toronto,"Garden District, Ryerson"
22,M5C,Downtown Toronto,St. James Town
30,M4E,East Toronto,The Beaches
31,M5E,Downtown Toronto,Berczy Park
40,M5G,Downtown Toronto,Central Bay Street
41,M6G,Downtown Toronto,Christie
49,M5H,Downtown Toronto,"Richmond, Adelaide, King"
50,M6H,West Toronto,"Dufferin, Dovercourt Village"


In [14]:
toronto_df.shape

(39, 3)

# import libraries

In [15]:

import requests # library to handle requests
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import pandas as pd 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values
from pandas.io.json import json_normalize 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


# Access Foursquare

In [16]:

CLIENT_ID = 'ZRMZZGCXZLQBD0HE52LWZQ1JKLTZFTQF545O0SYT213KFBOK' # your Foursquare ID
CLIENT_SECRET = 'TULJH33RBUTZ3E4NV4LVRHC4O4JE425WUIQP2FVG41N4P04P' # your Foursquare Secret
ACCESS_TOKEN = 'RTHUEBRBCVGK4EZDSF051F4MTWRUMWUCAVZIAW0YORDHQLMP' # your FourSquare Access Token
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZRMZZGCXZLQBD0HE52LWZQ1JKLTZFTQF545O0SYT213KFBOK
CLIENT_SECRET:TULJH33RBUTZ3E4NV4LVRHC4O4JE425WUIQP2FVG41N4P04P


#  From Lawrence Park

In [17]:
address = 'Lawrence Park, Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.729199 -79.4032525


In [20]:
search_query = 'coffee'
radius = 500
print(search_query + ' Found!')

coffee Found!


In [21]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&oauth_token={}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude,ACCESS_TOKEN, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=ZRMZZGCXZLQBD0HE52LWZQ1JKLTZFTQF545O0SYT213KFBOK&client_secret=TULJH33RBUTZ3E4NV4LVRHC4O4JE425WUIQP2FVG41N4P04P&ll=43.729199,-79.4032525&oauth_token=RTHUEBRBCVGK4EZDSF051F4MTWRUMWUCAVZIAW0YORDHQLMP&v=20180604&query=coffee&radius=500&limit=30'

# Get results

In [22]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fe1966f54a4277f8f4a1ce3'},
 'notifications': [{'type': 'notificationTray', 'item': {'unreadCount': 0}}],
 'response': {'venues': [{'id': '519e6c44498eda6c74ccd15b',
    'name': 'Starbucks',
    'location': {'address': '3050 Yonge St',
     'crossStreet': 'at Lawrence Ave W',
     'lat': 43.724878,
     'lng': -79.40249,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.724878,
       'lng': -79.40249}],
     'distance': 484,
     'postalCode': 'M4N 2K4',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['3050 Yonge St (at Lawrence Ave W)',
      'Toronto ON M4N 2K4',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d1e0931735',
      'name': 'Coffee Shop',
      'pluralName': 'Coffee Shops',
      'shortName': 'Coffee Shop',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/coffeeshop_',
       'suffix': '.png'},
      'primary': True}],
    're

In [23]:
venues = results['response']['venues']
dataframe = json_normalize(venues)
dataframe.head()

  from ipykernel import kernelapp as app


Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress
0,519e6c44498eda6c74ccd15b,Starbucks,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1608619631,False,3050 Yonge St,at Lawrence Ave W,43.724878,-79.40249,"[{'label': 'display', 'lat': 43.724878, 'lng':...",484,M4N 2K4,CA,Toronto,ON,Canada,"[3050 Yonge St (at Lawrence Ave W), Toronto ON..."
