# Part 1: explore neighbourhood

Import libraries

In [1]:
!pip install BeautifulSoup4
!pip install requests



Download data

In [2]:
#imports
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

#get html from wiki page and create soup object
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row of data is the header
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
canada_df = pd.DataFrame(data = data,columns = columns)
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


Clean data

In [3]:
canada_df = canada_df[canada_df['Neighborhood'] != 'Not assigned']
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Group by postal code

In [4]:
canada_df["Neighborhood"] = canada_df.groupby("Postal Code")["Neighborhood"].transform(lambda neigh: ', '.join(neigh))

#remove duplicates
canada_df = canada_df.drop_duplicates()

#update index to be postcode if it isn't already
if(canada_df.index.name != 'Postal Code'):
    canada_df = canada_df.set_index('Postal Code')
    
canada_df.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


Importing coordinates

In [5]:
df=pd.read_csv('http://cocl.us/Geospatial_data')
df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Concatenate tables

In [6]:
df = df.set_index('Postal Code')
df_combined = pd.concat([canada_df, df], axis=1, join='inner')

df_combined.index.name = 'Postal Code'
df_combined.reset_index(inplace=True)

df_combined.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [7]:
df_combined.shape

(103, 5)

Import libraries

In [8]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests
from pandas.io.json import json_normalize
import json

import requests

from bs4 import BeautifulSoup

from geopy.geocoders import Nominatim


import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

print('Libraries imported!')

Libraries imported!


In [9]:
!pip install folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 6.9MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [10]:
import folium

Set co-ordinates for Toronto

In [11]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tl-toronto-neigh")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The co-ordinates of Toronto are {}, {}.'.format(latitude, longitude))

The co-ordinates of Toronto are 43.6534817, -79.3839347.


Create a folium map of Toronto

In [12]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Postal Code'], df_combined['Borough'], df_combined['Neighborhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto

# Part 2: check hotels in the neighbourhood

Set account credentials

In [13]:
CLIENT_ID = '3HSNMTNOB02MKCLT5PJR4ZUKFJQX5NRE42GDZZZIU2LVGGU1' # your Foursquare ID
CLIENT_SECRET = 'HPPDIJIDWVYKR1HKDQCJDTLE4I2WSBCULOSI0JKUTQJ1RQGO' # your Foursquare Secret
VERSION = '20200515' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 3HSNMTNOB02MKCLT5PJR4ZUKFJQX5NRE42GDZZZIU2LVGGU1
CLIENT_SECRET:HPPDIJIDWVYKR1HKDQCJDTLE4I2WSBCULOSI0JKUTQJ1RQGO


Define the corresponding URL and mine the hotels data

In [14]:
radius = 1000
LIMIT = 200
search_query = 'hotel'
venues = []

for lat, long, post, borough, neighborhood in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Postal Code'], df_combined['Borough'], df_combined['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        search_query,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [15]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
venues_df = venues_df[venues_df['VenueCategory'] == 'Hotel']
print(venues_df.shape)
venues_df.head()

(438, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
1,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Residence & Conference Centre,43.65304,-79.35704,Hotel
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Filmores Hotel Toronto,43.658019,-79.370326,Hotel
5,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,The Hazelton Hotel,43.670764,-79.393257,Hotel
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Windsor Arms Hotel,43.668781,-79.39085,Hotel
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,DoubleTree by Hilton,43.654608,-79.385942,Hotel


In [16]:
class_n = pd.DataFrame(venues_df['Neighborhood'].value_counts())
class_n.rename(columns = {'Neighborhood':'NofHotels'}, inplace=True)
class_n.head()

Unnamed: 0,NofHotels
"Richmond, Adelaide, King",49
"First Canadian Place, Underground city",44
"Toronto Dominion Centre, Design Exchange",43
"Commerce Court, Victoria Hotel",39
St. James Town,38


Define the corresponding URL and mine the cafes data

In [17]:
radius = 500
LIMIT = 100
search_query = 'cafe'
cafes = []

for lat, long, post, borough, neighborhood in zip(df_combined['Latitude'], df_combined['Longitude'], df_combined['Postal Code'], df_combined['Borough'], df_combined['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        search_query,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        cafes.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [18]:
cafes_df = pd.DataFrame(cafes)
cafes_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(cafes_df.shape)
cafes_df.head()

(504, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Café
1,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Café
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Starbucks,43.651613,-79.364917,Café
3,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,Caffe Furbo,43.64997,-79.358849,Café
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,ODIN Cafe + Bar,43.656739,-79.356503,Café


In [19]:
class_nn = pd.DataFrame(cafes_df['Neighborhood'].value_counts())
class_nn.rename(columns = {'Neighborhood':'NofCafes'}, inplace=True)
class_nn.head()

Unnamed: 0,NofCafes
"Commerce Court, Victoria Hotel",51
"First Canadian Place, Underground city",46
St. James Town,42
"Toronto Dominion Centre, Design Exchange",41
"Richmond, Adelaide, King",37


Combine data and find best Neighborhood to start a cafeteria

In [22]:
class_fin = pd.concat([class_n, class_nn], axis=1, join='inner')
a=pd.DataFrame(class_fin['NofHotels']-class_fin['NofCafes'])
a.columns = ['OptIndex']
b=pd.DataFrame(class_fin['NofHotels']+a['OptIndex'])
c = pd.concat([class_fin, a, b], axis=1, join='inner')
c.reset_index(inplace=True)
c.columns = ['Neighborhood','NofHotels', 'NofCafes','NofHotels-NofCafes', 'OptIndex']
c.sort_values('OptIndex', ascending=False,inplace=True)
c.reset_index(inplace=True)
c.drop('index', axis=1, inplace=True)
c.OptIndex = -c.OptIndex.min()+c.OptIndex
print("Neighborhood rating to start a cafeteria")
c

Neighborhood rating to start a cafeteria


Unnamed: 0,Neighborhood,NofHotels,NofCafes,NofHotels-NofCafes,OptIndex
0,"Richmond, Adelaide, King",49,37,12,76
1,"Toronto Dominion Centre, Design Exchange",43,41,2,60
2,"First Canadian Place, Underground city",44,46,-2,57
3,"Garden District, Ryerson",34,27,7,56
4,"Harbourfront East, Union Station, Toronto Islands",27,14,13,55
5,Stn A PO Boxes,31,23,8,54
6,Berczy Park,23,9,14,52
7,St. James Town,38,42,-4,49
8,"Commerce Court, Victoria Hotel",39,51,-12,42
9,Church and Wellesley,22,17,5,42


In [35]:
df_location = df_combined.set_index('Neighborhood')
bv = c.set_index('Neighborhood')
bv_combined = pd.concat([bv, df_location], axis=1, join='inner')
bv_combined.reset_index(inplace=True)
bv_combined = bv_combined.head()
print("List of best 5 locations to start a cafeteria:")
bv_combined

List of best 5 locations to start a cafeteria:


Unnamed: 0,Neighborhood,NofHotels,NofCafes,NofHotels-NofCafes,OptIndex,Postal Code,Borough,Latitude,Longitude
0,"Richmond, Adelaide, King",49,37,12,76,M5H,Downtown Toronto,43.650571,-79.384568
1,"Toronto Dominion Centre, Design Exchange",43,41,2,60,M5K,Downtown Toronto,43.647177,-79.381576
2,"First Canadian Place, Underground city",44,46,-2,57,M5X,Downtown Toronto,43.648429,-79.38228
3,"Garden District, Ryerson",34,27,7,56,M5B,Downtown Toronto,43.657162,-79.378937
4,"Harbourfront East, Union Station, Toronto Islands",27,14,13,55,M5J,Downtown Toronto,43.640816,-79.381752


In [31]:
map_toronto_f = folium.Map(location=[latitude, longitude], zoom_start=15)

for lat, long, post, neigh in zip(bv_combined['Latitude'], bv_combined['Longitude'], bv_combined['Postal Code'], bv_combined['Neighborhood']):
    label = "{} ({})".format(neigh, post)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_f)
    
map_toronto_f