# Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

Lets download all dependencies that we need for the project

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1e             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    certifi-2019.11.28         |   py36h9f0ad1d_1         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

Loading the data.

In [3]:
# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup

import xml

In [4]:
link = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
bsoup = BeautifulSoup(link,'lxml')

In [5]:
table_post = bsoup.find('table')
fields = table_post.find_all('td')

postcode = []
borough = []
neighbourhood = []

for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip()[:3])
    borough.append(fields[i+1].text.strip()[3:])
    neighbourhood.append(fields[i+2].text.strip())
        
df_pc = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df_pc.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_pc.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,M3ANorth York(Parkwoods)
1,M4A,Downtown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights)
2,M7A,Not assigned,M9AEtobicoke(Islington Avenue)
3,M1B,Not assigned,M3BNorth York(Don Mills)North
4,M4B,"Downtown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn)


Replacing the Not assigned value with np.nan and drop them from the data

In [6]:
df_pc['Borough'].replace('Not assigned', np.nan, inplace=True)
df_pc.dropna(subset=['Borough'], inplace=True)

df_pc.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M4A,Downtown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights)
4,M4B,"Downtown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn)
7,M4C,Downtown Toronto(St. James Town),M6CYork(Humewood-Cedarvale)
10,M4E,Downtown Toronto(Berczy Park),M6EYork(Caledonia-Fairbanks)
13,M4G,Downtown Toronto(Central Bay Street),M6GDowntown Toronto(Christie)


In [7]:

df_pcn = df_pc.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_pcn.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_pcn

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1H,North York(Hillcrest Village),M3HNorth York(Bathurst Manor / Wilson Heights ...
1,M1J,North York(Fairview / Henry Farm / Oriole),M3JNorth York(Northwood Park / York University)
2,M1K,North York(Bayview Village),M3KNorth York(Downsview)East (CFB Toronto)
3,M1L,North York(York Mills / Silver Hills),M3LNorth York(Downsview)West
4,M1M,North York(Willowdale / Newtonbrook),M3MNorth York(Downsview)Central
5,M1N,North York(Willowdale)South,M3NNorth York(Downsview)Northwest
6,M1P,North York(York Mills West),M3PNot assigned
7,M1R,North York(Willowdale)West,M3RNot assigned
8,M4A,Downtown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights)
9,M4B,"Downtown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn)


Uploading geospatial data

In [8]:

df_geo = pd.read_csv('http://cocl.us/Geospatial_data')
df_geo.columns = ['Postcode', 'Latitude', 'Longitude']

df_pos = pd.merge(df_pcn, df_geo, on=['Postcode'], how='inner')

df_tor = df_pos[['Borough', 'Neighbourhood', 'Postcode', 'Latitude', 'Longitude']].copy()

df_tor.head()

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,North York(Hillcrest Village),M3HNorth York(Bathurst Manor / Wilson Heights ...,M1H,43.773136,-79.239476
1,North York(Fairview / Henry Farm / Oriole),M3JNorth York(Northwood Park / York University),M1J,43.744734,-79.239476
2,North York(Bayview Village),M3KNorth York(Downsview)East (CFB Toronto),M1K,43.727929,-79.262029
3,North York(York Mills / Silver Hills),M3LNorth York(Downsview)West,M1L,43.711112,-79.284577
4,North York(Willowdale / Newtonbrook),M3MNorth York(Downsview)Central,M1M,43.716316,-79.239476


To find the geographical coordinate:

In [9]:
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the of Toronto are {}, {}.'.format(latitude, longitude))

  from ipykernel import kernelapp as app


The geograpical coordinate of the of Toronto are 43.653963, -79.387207.


creating map

In [10]:
# creating map of New York using latitude, longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# adding markers to map
for lat, lng, borough, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Borough'], df_tor['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Exploring toronto neighbourhoods

In [11]:

CLIENT_ID = 'My Name' # your Foursquare ID
CLIENT_SECRET = 'My Secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: My Name
CLIENT_SECRET:My Secret


selecting neighbour

In [12]:
df_t4 = df_tor[df_tor['Borough'].str.contains('Toronto')]

to_data = df_t4.reset_index(drop=True)
to_data

Unnamed: 0,Borough,Neighbourhood,Postcode,Latitude,Longitude
0,Downtown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights),M4A,43.725882,-79.315572
1,"Downtown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn),M4B,43.706397,-79.309937
2,Downtown Toronto(St. James Town),M6CYork(Humewood-Cedarvale),M4C,43.695344,-79.318389
3,Downtown Toronto(Berczy Park),M6EYork(Caledonia-Fairbanks),M4E,43.676357,-79.293031
4,Downtown Toronto(Central Bay Street),M6GDowntown Toronto(Christie),M4G,43.70906,-79.363452
5,Downtown Toronto(Richmond / Adelaide / King),M6HWest Toronto(Dufferin / Dovercourt Village),M4H,43.705369,-79.349372
6,Downtown Toronto(Harbourfront East / Union Sta...,M6JWest Toronto(Little Portugal / Trinity),M4J,43.685347,-79.338106
7,Downtown Toronto(Toronto Dominion Centre / Des...,M6KWest Toronto(Brockton / Parkdale Village / ...,M4K,43.679557,-79.352188
8,Downtown Toronto(Commerce Court / Victoria Hotel),M6LNorth York(North Park / Maple Leaf Park / U...,M4L,43.668999,-79.315572
9,Central Toronto(Roselawn),M6NYork(Runnymede / The Junction North),M4N,43.72802,-79.38879


Map of toronto neighbour

In [13]:
map_tohood = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(to_data['Latitude'], to_data['Longitude'], to_data['Borough'], to_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_tohood)  
    
map_tohood

First neighbourhood

In [14]:
to_data.loc[0, 'Neighbourhood']

'M6ANorth York(Lawrence Manor / Lawrence Heights)'

In [15]:
neighbourhood_latitude = to_data.loc[0, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = to_data.loc[0, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = to_data.loc[0, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of M6ANorth York(Lawrence Manor / Lawrence Heights) are 43.725882299999995, -79.31557159999998.


In [16]:
LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=My Name&client_secret=My Secret&v=20180605&ll=43.725882299999995,-79.31557159999998&radius=500&limit=100'

In [17]:

results = requests.get(url).json()
results

{'meta': {'code': 400,
  'errorType': 'invalid_auth',
  'errorDetail': 'Missing access credentials. See https://developer.foursquare.com/docs/api/configuration/authentication for details.',
  'requestId': '5e7a28b3216785001bf981fc'},
 'response': {}}

In [18]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']