# Segmenting and Clustering Neighborhoods in Toronto¶

## Import Libraries

In [1]:

import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup

import xml

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


Libraries imported.


usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


## Load data from previous Assignment

In [5]:
df = pd.read_csv('TorantoNeighborhoodGeo.csv')
df.drop('Unnamed: 0', axis=1,inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Get Geographical cordinates of Toranto

In [7]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent='toranto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of the City of Toronto are 43.6534817, -79.3839347.


## Create a map of Toranto with neighborhoods superimposed on top

In [10]:
# create map object
toranto_map = folium.Map(location=[latitude,longitude], zoom_start=12)

# add markers to map
for lat, lon, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toranto_map)

toranto_map

# Explore Toraoto neighborhoods

In [11]:
CLIENT_ID = 'Private' # your Foursquare ID
CLIENT_SECRET = 'Private' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NE4Q4G1XUSVVI0MWFQCJOENARJX0SFHTBHD3WCRJEVF3IG43
CLIENT_SECRET:YJE50LKGFPBIRF0CNJVAOIGV4YULI5SC33WTHAYANMDQXGTR


## Select only Toronto Neighborhoods

In [14]:
toronto_neighborhoods = df[df['Borough'].str.contains('Toronto')]
toronto_neighborhoods.reset_index(inplace=True, drop=True)
toronto_neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


## Map of Toronto Neighbourhoods only

In [17]:
# create map object
torontoHoods_map = folium.Map(location=[latitude,longitude], zoom_start=12)

for lat, lon, borough, neighborhood in zip(toronto_neighborhoods['Latitude'], toronto_neighborhoods['Longitude'], toronto_neighborhoods['Borough'], toronto_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3199cc',
    fill_opacity=0.3,
    parse_html=False).add_to(torontoHoods_map)

torontoHoods_map

## Explore the first neighborhood

## Get its info

In [18]:
hood_latitude = toronto_neighborhoods.loc[0, 'Latitude']
hood_longitude = toronto_neighborhoods.loc[0, 'Longitude']
hood_name = toronto_neighborhoods.loc[0, 'Neighborhood']

print('Latitude and longitude values of {} are {}, {}.'.format(hood_name, hood_latitude, hood_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


## Top 100 venues are within a radius from the centroid of 1000 meters

In [19]:
limit = 100
radius = 1000

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    hood_latitude, 
    hood_longitude, 
    radius, 
    limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=NE4Q4G1XUSVVI0MWFQCJOENARJX0SFHTBHD3WCRJEVF3IG43&client_secret=YJE50LKGFPBIRF0CNJVAOIGV4YULI5SC33WTHAYANMDQXGTR&v=20180605&ll=43.67635739999999,-79.2930312&radius=1000&limit=100'

In [21]:
results = requests.get(url).json()

In [22]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## Clean json and structure into a pandas dataframe

In [23]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,Tori's Bakeshop,Vegetarian / Vegan Restaurant,43.672114,-79.290331
2,The Beech Tree,Gastropub,43.680493,-79.288846
3,Ed's Real Scoop,Ice Cream Shop,43.67263,-79.287993
4,Beaches Bake Shop,Bakery,43.680363,-79.289692


## How many venues have returned from Foursquare

In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

78 venues were returned by Foursquare.


## Venues Map

In [31]:
# create map object
torontoVenues_map = folium.Map(location=[hood_latitude,hood_longitude], zoom_start=14)

for lat, lon, name, category in zip(nearby_venues['lat'], nearby_venues['lng'], nearby_venues['name'], nearby_venues['categories']):
    label = '{}, {}'.format(name, category)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lon],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3199cc',
    fill_opacity=0.3,
    parse_html=False).add_to(torontoVenues_map)

torontoVenues_map