# Part 1 - Creating Initial Dataframe

Importing all the libraries

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import geocoder
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize 

Making a beautifulsoup object given the site link.

In [2]:
URL = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=926287641'
page = requests.get(URL)

soup = BeautifulSoup(page.content, 'html.parser')

Finding the table in the html code.

In [3]:
table=soup.find('table')

Creating an inital rough table with all the details.

In [4]:
table_contents=[]
for row in table.findAll('tr')[1:]:
    cell = {}
    if row.findAll('td')[1:2][0].text=='Not assigned':     ## If 'Borough' is 'Not assigned' then we simply ignore the row
        pass
    else:
        cell['PostalCode'] = row.td.text[:3]
        cell['Borough'] = row.findAll('td')[1:2][0].text.strip("\n")
        
        ## If 'Neighborhood' is 'Not assigned' then we give it the value from 'Borough'
        if row.findAll('td')[2:3][0].text == 'Not assigned\n':
            cell['Neighborhood'] = row.findAll('td')[1:2][0].text.strip("\n")
        else:
            cell['Neighborhood'] = row.findAll('td')[2:3][0].text.strip("\n")
        
        ## Appending it to the list
        table_contents.append(cell)

Converting list to dataframes.

In [5]:
df=pd.DataFrame(table_contents)

Combining cells with same 'Postal Code'.

In [6]:
df['Neighborhood'] = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].transform(lambda x: ', '.join(x))
df = df.drop_duplicates()
df = df.reset_index()
df.drop(['index'], axis=1, inplace=True)
print(df.head(10))

  PostalCode           Borough                      Neighborhood
0        M3A        North York                         Parkwoods
1        M4A        North York                  Victoria Village
2        M5A  Downtown Toronto         Harbourfront, Regent Park
3        M6A        North York  Lawrence Heights, Lawrence Manor
4        M7A      Queen's Park                      Queen's Park
5        M9A         Etobicoke                  Islington Avenue
6        M1B       Scarborough                    Rouge, Malvern
7        M3B        North York                   Don Mills North
8        M4B         East York   Woodbine Gardens, Parkview Hill
9        M5B  Downtown Toronto          Ryerson, Garden District


Printing the shape.

In [7]:
print(df.shape)

(103, 3)


# Part 2 - Finding Coordinates

Creating a function to get latitude and longitude value given postal code.

In [8]:
geoSpatial = pd.read_csv('Geospatial_Coordinates.csv')
print(geoSpatial.head(5))

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476


Creating two lists with information regarding the coordinates. So that we can add the lists as columns to the existing table.

In [9]:
Latitude = []
Longitude = []

for index, row in df.iterrows():
    postal_code = row['PostalCode']
    Latitude.append(geoSpatial.loc[geoSpatial['Postal Code']==postal_code]['Latitude'].iloc[0])    
    Longitude.append(geoSpatial.loc[geoSpatial['Postal Code']==postal_code]['Longitude'].iloc[0])

In [10]:
df['Longitude'] = Longitude
df['Latitude'] = Latitude

Printing Final Table

In [11]:
print(df.head(10))

  PostalCode           Borough                      Neighborhood  Longitude  \
0        M3A        North York                         Parkwoods -79.329656   
1        M4A        North York                  Victoria Village -79.315572   
2        M5A  Downtown Toronto         Harbourfront, Regent Park -79.360636   
3        M6A        North York  Lawrence Heights, Lawrence Manor -79.464763   
4        M7A      Queen's Park                      Queen's Park -79.389494   
5        M9A         Etobicoke                  Islington Avenue -79.532242   
6        M1B       Scarborough                    Rouge, Malvern -79.194353   
7        M3B        North York                   Don Mills North -79.352188   
8        M4B         East York   Woodbine Gardens, Parkview Hill -79.309937   
9        M5B  Downtown Toronto          Ryerson, Garden District -79.378937   

    Latitude  
0  43.753259  
1  43.725882  
2  43.654260  
3  43.718518  
4  43.662301  
5  43.667856  
6  43.806686  
7  43.7459

# Part 3 - Analysis for Toronto

Use geopy library to get the latitude and longitude values of Toronto.

In [12]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
toronto_location = geolocator.geocode(address)
toronto_latitude = toronto_location.latitude
toronto_longitude = toronto_location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(toronto_latitude, toronto_longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


Create a map of New York with neighborhoods superimposed on top.

In [13]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[toronto_latitude, toronto_longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Define Foursquare Credentials and Version

In [14]:
CLIENT_ID = '1E0AOV3BCRV0ZGWDR5EAPWFO1ARCZCXCVYTYLIFNZLAEATII' # Foursquare ID
CLIENT_SECRET = 'EYVEW53WWRSYGNSY1ZD24K5DU21PIBTMAF5QLZOTYQZ1IA1F' # Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 1E0AOV3BCRV0ZGWDR5EAPWFO1ARCZCXCVYTYLIFNZLAEATII
CLIENT_SECRET:EYVEW53WWRSYGNSY1ZD24K5DU21PIBTMAF5QLZOTYQZ1IA1F


**Let's explore the first neighborhood in our dataframe.**
Get the neighborhood's name.

In [15]:
df.loc[0, 'Neighborhood']

'Parkwoods'

Get the neighborhood's latitude and longitude values.

In [16]:
neighborhood_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


**Now, let's get the top 100 venues that are in Parkwoods within a radius of 500 meters.**
First, let's create the GET request URL. Name your URL url.

In [17]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
print(url) # display URL

https://api.foursquare.com/v2/venues/explore?&client_id=1E0AOV3BCRV0ZGWDR5EAPWFO1ARCZCXCVYTYLIFNZLAEATII&client_secret=EYVEW53WWRSYGNSY1ZD24K5DU21PIBTMAF5QLZOTYQZ1IA1F&v=20180605&ll=43.7532586,-79.3296565&radius=500&limit=100


Send the GET request and examine the resutls

In [18]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '608db989c835ba38935a1aa0'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 3,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

Let's borrow the get_category_type function from the Foursquare lab.

In [19]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Clean the json and structure it into a pandas dataframe

In [20]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,KFC,Fast Food Restaurant,43.754387,-79.333021
2,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [21]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

3 venues were returned by Foursquare.
