### Import the important Library

In [116]:
import numpy as np # Library to handle data in a vectorized manner
import pandas as pd # Library for Data analysis

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

import json # Library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests #library to handle requests
from pandas.io.json import json_normalize #transform JSON file into a pandas datframe

#Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda insatll -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported')

Libraries imported


## Part 1 (Scraping Webpage)

In [136]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
extracting_data = requests.get(url).text
wiki_data = BeautifulSoup(extracting_data, 'lxml')

In [140]:
table_contents=[]
table=wiki_data.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [142]:
df.describe()

Unnamed: 0,PostalCode,Borough,Neighborhood
count,103,103,103
unique,103,15,103
top,M9R,North York,"Alderwood, Long Branch"
freq,1,24,1


### Data Cleaning

In [145]:
df = df.dropna()
empty = 'Not assigned'
df = df[(df.PostalCode != empty) & (df.Borough != empty) & (df.Neighborhood != empty)]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


## Part 2 (Adding Geographical Location)

In [147]:
!pip install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [148]:
import geocoder

In [149]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [155]:
print(df.shape)
df.describe()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighborhood
count,103,103,103
unique,103,15,103
top,M9R,North York,"Alderwood, Long Branch"
freq,1,24,1


In [166]:
import os
os.chdir('C:/Users/Aniket/')

In [177]:
os.getcwd()

'C:\\Users\\Aniket'

In [182]:
cor = pd.read_csv('C:/Users/Aniket/Desktop/IBM Data Science/10- Capstone/Data/Geospatial_Coordinates.csv')
cor.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [185]:
dataframe = pd.merge(df, cor, on = 'PostalCode')
dataframe.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494


## Part 3 ( Visualization)

In [187]:
from geopy.geocoders import Nominatim #convert an address into latitude and longitude values
import folium # For Map rendering

In [194]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Tokyo City are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Tokyo City are 43.6534817, -79.3839347.


#### Create a map of New York with neighborhoods superimposed on top.

In [195]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)
#map_newyork

# add markers to map
for lat, lng, borough, neighborhood in zip(dataframe['Latitude'], dataframe['Longitude'], dataframe['Borough'], 
                                           dataframe['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_newyork)
    
map_newyork

### Discover "North York" Borough of Toronto

In [203]:
north_data = dataframe[dataframe['Borough'] == 'North York'].reset_index(drop=True)
north_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills North,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [204]:
address = 'North York,Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Manhattan are 43.7543263, -79.44911696639593.


In [206]:
# create map of Manhattan using latitude and longitude values
map_north = folium.Map(location=[latitude, longitude], zoom_start=11)

#add markers to map
for lat, lng, label in zip(north_data['Latitude'], north_data['Longitude'], north_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_north)
    
map_north

#### Define Foursquare Credentials and Version

In [196]:
CLIENT_ID = 'C3LVNGQRL22PJZZXFWBL0QNOU5RS5SKRZDXDSE4F5PGXNK5M' # your Foursquare ID
CLIENT_SECRET = 'OD0Q2OSACWDV2ESKZSYJ2CFLB1SN3BT12ZZ1RKNHBIDMCDNO' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: C3LVNGQRL22PJZZXFWBL0QNOU5RS5SKRZDXDSE4F5PGXNK5M
CLIENT_SECRET:OD0Q2OSACWDV2ESKZSYJ2CFLB1SN3BT12ZZ1RKNHBIDMCDNO


#### Let's explore the first neighborhood in our dataframe.

In [207]:
north_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills North,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [208]:
north_data.loc[0, 'Neighborhood']

'Parkwoods'

Get the neighborhood's latitude and longitude values.


In [210]:
neighborhood_latitude = north_data.loc[0, 'Latitude'] #neighborbood latitude value
neighborhood_longitude = north_data.loc[0, 'Longitude'] #neighborhood longitude value
neighborhood_name = north_data.loc[0, 'Neighborhood'] #neighborhood name

print('Latitude and Longitude values of {} are {}, {}.'.format(neighborhood_name,
                                                             neighborhood_latitude,
                                                             neighborhood_longitude))

Latitude and Longitude values of Parkwoods are 43.7532586, -79.3296565.


In [211]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

#create url
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
neighborhood_latitude,
neighborhood_longitude,
radius,
LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=C3LVNGQRL22PJZZXFWBL0QNOU5RS5SKRZDXDSE4F5PGXNK5M&client_secret=OD0Q2OSACWDV2ESKZSYJ2CFLB1SN3BT12ZZ1RKNHBIDMCDNO&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100'

In [212]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60ebf2943ad67664d26e2cbf'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

In [213]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [214]:
venues = results['response']['groups'][0]['items']
#venues
nearby_venues = json_normalize(venues) #flatten JSON
#nearby_venues

#filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

#clean columns
nearby_venues.columns = [col.split('.')[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [215]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.
