### Capstone Project Segmentation & Clustering - Question 3 - Exploring neighborhoods

#### Replicate Postal Codes, Latitude and Longitude

In [32]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]

# Reading the data using Beautiful Soup
df = pd.read_html(str(table))
df = pd.DataFrame(df[0])
df.columns = ['Postcode', 'Borough','Neighbourhood']
df = df.drop(df.index[0])

# Converting the HTML table into Pandas Dataframe
df = pd.read_html(str(table))
df = pd.DataFrame(df[0])
df.columns = ['Postcode', 'Borough','Neighbourhood']
df = df.drop(df.index[0])

# Cleaning up the data
df1 = df[df.Borough != 'Not assigned']
can_postalcode = df1.groupby(['Postcode','Borough'],as_index=False).agg(', '.join)
can_postalcode.loc[can_postalcode['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = can_postalcode['Borough']

# Latitude and Longitude
lat_long = pd.read_csv("https://cocl.us/Geospatial_data")
lat_long.rename(columns={'Postal Code':'Postcode'}, inplace=True)

#Merge 
neighborhoods = pd.merge(can_postalcode, lat_long, on='Postcode')
neighborhoods.tail()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
102,M9W,Etobicoke,Northwest,43.706748,-79.594054


#### Question 3 - Exploring neighborhoods

In [10]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [14]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Ontario are 43.653963, -79.387207.


#### Explore the Neighborhoods

In [26]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough']):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Foursquare Details

In [27]:
CLIENT_ID = 'QA3G22AQUC2H3BZK0QZDSPRRL5WCCZR2ZMYA12SNRKVK0OT4' # your Foursquare ID
CLIENT_SECRET = '302LWI5L0WSIROCPP52ZP1BQGLNALDP04V2NYJRJKBSGPMJV' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QA3G22AQUC2H3BZK0QZDSPRRL5WCCZR2ZMYA12SNRKVK0OT4
CLIENT_SECRET:302LWI5L0WSIROCPP52ZP1BQGLNALDP04V2NYJRJKBSGPMJV


#### Explore the first neighborhood in 1 borough

In [48]:
york_data = neighborhoods[neighborhoods['Borough'] == 'York'].reset_index(drop=True)
york_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
1,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512
2,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.691116,-79.476013
3,M6N,York,"The Junction North, Runnymede",43.673185,-79.487262
4,M9N,York,Weston,43.706876,-79.518188


In [49]:
york_data.iloc[0,2]

'Humewood-Cedarvale'

In [50]:
neighborhood_latitude = york_data.iloc[0,3] # neighborhood latitude value
neighborhood_longitude = york_data.iloc[0,4] # neighborhood longitude value

neighborhood_name = york_data.iloc[0,2] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Humewood-Cedarvale are 43.6937813, -79.42819140000002.


#### let's get the top 100 venues that are in Marble Hill within a radius of 500 meters.

In [53]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
# url # display URL
results = requests.get(url).json()
# results

In [54]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [56]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


In [57]:
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Cedarvale Park,Field,43.692535,-79.428705
1,Phil White Arena,Hockey Arena,43.691303,-79.431761
2,Cedarvale Ravine,Trail,43.690188,-79.426106
3,Prince's Parkette,Park,43.697385,-79.424704
