In [1]:
# Load Libraries

import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
#!conda install -c conda-forge folium --yes
import folium # plotting library
print('Folium installed')


# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup
import xml

# library to handle JSON files
import json 
print('Libraries imported.')


Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


# Scrape Wikipedia

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')

In [3]:
table_post = soup.find('table')
fields = table_post.find_all('td')

postcode = []
borough = []
neighbourhood = []

for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())
        
df_wk = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df_wk.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_wk

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


# Remove "Not assigned"

In [4]:
df_wk['Borough'].replace('Not assigned', np.nan, inplace=True)
df_wk.dropna(subset=['Borough'], inplace=True)
df_wk

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#  Combine Neighbourhood if same Postcode

In [5]:
df_proc = df_wk.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_proc.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_proc

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Assign Borough to Neignbourhood if 'Not assigned'

In [6]:
for index, row in df_proc.iterrows():
    if row['Neighbourhood']  == 'Not assigned':
        row['Neighbourhood']= row['Borough']
        
df_proc

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Read Geo data

In [7]:
df_gd = pd.read_csv('http://cocl.us/Geospatial_data')
df_gd.columns = ['Postcode', 'Latitude', 'Longitude']

# merge 
df_toronto = pd.merge(df_proc, df_gd, on=['Postcode'], how='inner')
df_toronto


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [8]:
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

  from ipykernel import kernelapp as app


The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


In [9]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Toronto Neighbourhoods

In [9]:
# Foursquare Credentials and Version
CLIENT_ID = '0DQ4LUMQN20AFAPMASVGGGSDPWGM0WZQZZZXL52VQTCZETTY' # your Foursquare ID
CLIENT_SECRET = 'CB1JW2GE3TXXV1XNUDAZMILI4QACAAKYBHGB5X3RUR2D5E0W' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: 0DQ4LUMQN20AFAPMASVGGGSDPWGM0WZQZZZXL52VQTCZETTY
CLIENT_SECRET:CB1JW2GE3TXXV1XNUDAZMILI4QACAAKYBHGB5X3RUR2D5E0W


# Get only Toronto Neighbourhoods

In [10]:
# get Borough with 'Toronto'
df_t_neighbour = df_toronto[df_toronto['Borough'].str.contains('Toronto')]
to_data = df_t_neighbour.reset_index(drop=True)
to_data

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


# Show Toronto Neighbourhoods only in the map

In [11]:
# create map of Toronto using latitude and longitude values
map_neighbors= folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(to_data['Latitude'], to_data['Longitude'], to_data['Borough'], to_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_opacity=0.3,
        parse_html=False).add_to(map_neighbors)  
    
map_neighbors

# Explore the first 5 neighbourhood

In [12]:
to_data.loc[:4, 'Neighbourhood']

0                       The Beaches
1      The Danforth West, Riverdale
2    The Beaches West, India Bazaar
3                   Studio District
4                     Lawrence Park
Name: Neighbourhood, dtype: object

# To visit friends in Lawrence Park, get latitude and longitude of the neighbourhood 

In [13]:
neighbourhood_latitude = to_data.loc[4, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = to_data.loc[4, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = to_data.loc[4, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


# Like to check some art gallaries around the area. Get the top 50 venus within 500 meters radius

In [14]:
# check venus within 500 meters radius
search_query = 'Art Gallery'
radius = 500
LIMIT = 50

In [15]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=0DQ4LUMQN20AFAPMASVGGGSDPWGM0WZQZZZXL52VQTCZETTY&client_secret=CB1JW2GE3TXXV1XNUDAZMILI4QACAAKYBHGB5X3RUR2D5E0W&ll=43.653963,-79.387207&v=20180605&query=Art Gallery&radius=500&limit=50'

In [16]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d47a5908fbe780023af6643'},
 'response': {'venues': [{'id': '4ad4c05ef964a520daf620e3',
    'name': 'Art Gallery of Ontario',
    'location': {'address': '317 Dundas St W',
     'crossStreet': 'at Beverley St',
     'lat': 43.654002860337386,
     'lng': -79.39292172707437,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.654002860337386,
       'lng': -79.39292172707437}],
     'distance': 460,
     'postalCode': 'M5T 1G4',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['317 Dundas St W (at Beverley St)',
      'Toronto ON M5T 1G4',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d1e2931735',
      'name': 'Art Gallery',
      'pluralName': 'Art Galleries',
      'shortName': 'Art Gallery',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/artgallery_',
       'suffix': '.png'},
      'primary': True}],
    'venuePage': {'id':

In [17]:
# relevant part of JSON to venues
venues = results['response']['venues']

# tranform into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d1e2931735', 'name': 'A...",False,4ad4c05ef964a520daf620e3,317 Dundas St W,CA,Toronto,Canada,at Beverley St,460,"[317 Dundas St W (at Beverley St), Toronto ON ...","[{'label': 'display', 'lat': 43.65400286033738...",43.654003,-79.392922,M5T 1G4,ON,Art Gallery of Ontario,v-1564976528,33853777.0
1,"[{'id': '4bf58dd8d48988d1e2931735', 'name': 'A...",False,4ae47067f964a520989a21e3,334 Dundas St West,CA,Toronto,Canada,,430,"[334 Dundas St West, Toronto ON M5T 1G5, Canada]","[{'label': 'display', 'lat': 43.65422714439051...",43.654227,-79.392536,M5T 1G5,ON,Art Square Gallery & Cafe,v-1564976528,
2,"[{'id': '4bf58dd8d48988d127951735', 'name': 'A...",False,4adf3c01f964a5208f7821e3,74 McCaul St,CA,Toronto,Canada,Dundas St.,333,"[74 McCaul St (Dundas St.), Toronto ON M5T 3K2...","[{'label': 'display', 'lat': 43.65264564807000...",43.652646,-79.390925,M5T 3K2,ON,Aboveground Art Supplies,v-1564976528,
3,"[{'id': '4bf58dd8d48988d124941735', 'name': 'O...",False,4c17f2a74ff90f47a5ae0d49,,CA,,Canada,,63,[Canada],"[{'label': 'display', 'lat': 43.653834, 'lng':...",43.653834,-79.387977,,,The Arthritis Society,v-1564976528,
4,"[{'id': '4bf58dd8d48988d1ae941735', 'name': 'U...",False,4ad4c064f964a52065f820e3,100 McCaul St,CA,Toronto,Canada,at Dundas St W,337,"[100 McCaul St (at Dundas St W), Toronto ON M5...","[{'label': 'display', 'lat': 43.65280251171013...",43.652803,-79.391074,M5T 1W1,ON,Ontario College of Art and Design University (...,v-1564976528,


In [18]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Art Gallery of Ontario,Art Gallery,317 Dundas St W,CA,Toronto,Canada,at Beverley St,460,"[317 Dundas St W (at Beverley St), Toronto ON ...","[{'label': 'display', 'lat': 43.65400286033738...",43.654003,-79.392922,M5T 1G4,ON,4ad4c05ef964a520daf620e3
1,Art Square Gallery & Cafe,Art Gallery,334 Dundas St West,CA,Toronto,Canada,,430,"[334 Dundas St West, Toronto ON M5T 1G5, Canada]","[{'label': 'display', 'lat': 43.65422714439051...",43.654227,-79.392536,M5T 1G5,ON,4ae47067f964a520989a21e3
2,Aboveground Art Supplies,Arts & Crafts Store,74 McCaul St,CA,Toronto,Canada,Dundas St.,333,"[74 McCaul St (Dundas St.), Toronto ON M5T 3K2...","[{'label': 'display', 'lat': 43.65264564807000...",43.652646,-79.390925,M5T 3K2,ON,4adf3c01f964a5208f7821e3
3,The Arthritis Society,Office,,CA,,Canada,,63,[Canada],"[{'label': 'display', 'lat': 43.653834, 'lng':...",43.653834,-79.387977,,,4c17f2a74ff90f47a5ae0d49
4,Ontario College of Art and Design University (...,University,100 McCaul St,CA,Toronto,Canada,at Dundas St W,337,"[100 McCaul St (at Dundas St W), Toronto ON M5...","[{'label': 'display', 'lat': 43.65280251171013...",43.652803,-79.391074,M5T 1W1,ON,4ad4c064f964a52065f820e3
5,artisan security,Residential Building (Apartment / Condo),"152 St Patrick St, Toronto, ON M5T",CA,Toronto,Canada,,75,"[152 St Patrick St, Toronto, ON M5T, Toronto O...","[{'label': 'display', 'lat': 43.65388698599722...",43.653887,-79.388133,M5T,ON,4f480f59e4b0186352a801b9
6,Art Bike Installation,,,CA,Toronto,Canada,,168,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65504131399523...",43.655041,-79.388683,,ON,4dff6f15e4cdf724607880c2
7,Art Framing N Copy Co.,Miscellaneous Shop,189 Dundas St. W,CA,Toronto,Canada,,167,"[189 Dundas St. W, Toronto ON M5G 1C7, Canada]","[{'label': 'display', 'lat': 43.65545281866891...",43.655453,-79.38689,M5G 1C7,ON,4fca7001e4b00e68465ddd13
8,Bau-Xi Gallery,Art Gallery,340 Dundas St West,CA,Toronto,Canada,McCaul,437,"[340 Dundas St West (McCaul), Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65410383852814...",43.654104,-79.39263,,ON,4c0ac194a1b32d7faeed99f0
9,Chada Import Gallery,Miscellaneous Shop,25 Baldwin Street,CA,Toronto,Canada,,546,"[25 Baldwin Street, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.656149, 'lng':...",43.656149,-79.39328,,ON,4c59f4e667ac0f477895054c


# Map the result

In [19]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=16) 


# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):

    
    ##    folium.features.CircleMarker(
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

# Heard 'Gallery 260' from a friend, so let's try.

In [20]:
venue_id = '4eeaab3f0e01182cafa5f01f' # ID of FGallery 260
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url

'https://api.foursquare.com/v2/venues/4eeaab3f0e01182cafa5f01f?client_id=0DQ4LUMQN20AFAPMASVGGGSDPWGM0WZQZZZXL52VQTCZETTY&client_secret=CB1JW2GE3TXXV1XNUDAZMILI4QACAAKYBHGB5X3RUR2D5E0W&v=20180605'

In [21]:
result = requests.get(url).json()
print(result['response']['venue'].keys())
result['response']['venue']

dict_keys(['id', 'name', 'contact', 'location', 'canonicalUrl', 'categories', 'verified', 'stats', 'likes', 'dislike', 'ok', 'allowMenuUrlEdit', 'beenHere', 'specials', 'photos', 'reasons', 'hereNow', 'createdAt', 'tips', 'shortUrl', 'timeZone', 'listed', 'pageUpdates', 'inbox', 'attributes'])


{'id': '4eeaab3f0e01182cafa5f01f',
 'name': 'Gallery 260',
 'contact': {'phone': '4167770260', 'formattedPhone': '(416) 777-0260'},
 'location': {'address': '260 Richmond Str E',
  'crossStreet': '(between Sherbourne & George Str)',
  'lat': 43.6532332800695,
  'lng': -79.37021865121656,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.6532332800695,
    'lng': -79.37021865121656}],
  'postalCode': 'M5A-1P4',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['260 Richmond Str E ((between Sherbourne & George Str))',
   'Toronto ON M5A-1P4',
   'Canada']},
 'canonicalUrl': 'https://foursquare.com/v/gallery-260/4eeaab3f0e01182cafa5f01f',
 'categories': [{'id': '4bf58dd8d48988d1e2931735',
   'name': 'Art Gallery',
   'pluralName': 'Art Galleries',
   'shortName': 'Art Gallery',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/artgallery_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,


# Get the venue's overall rating

In [22]:
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

This venue has not been rated yet.


# That's ok, just plot in the map

In [23]:
address = '260 Richmond Str E, Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.6532447 -79.3703095


In [102]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=16) # generate map centred around the Conrad Hotel

# add a red circle marker to represent Feheley Fine Arts
##folium.features.CircleMarker(
folium.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Feheley Fine Arts',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
##    folium.features.CircleMarker(
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map