<h4> Import the Libraries </h4>

In [11]:
import requests
from bs4 import BeautifulSoup

import numpy as np 
import pandas as pd
import folium 

from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

<h4> Import the Website and convert it to a lxml </h4>

In [12]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url, 'lxml')

<h4> Get the required table and read it to a dataframe </h4>

In [13]:
table = soup.find_all('table')[0] 
df_tor = pd.read_html(str(table))[0]
df_tor.columns = ['Postal Code', 'Borough', 'Neighbourhood']
df_tor = df_tor.iloc[1:]
df_tor[df_tor == 'Not assigned'] = np.nan

<h4> Covert desired datatype to string </h4>

In [14]:
df_tor['Postal Code'] = df_tor['Postal Code'].astype('str')

<h4> Remove the missing values in Borough column </h4>

In [15]:
df_tor = df_tor[df_tor['Borough'].notnull()]

<h4> Assign missing values in Neighbourhood column as the Borough </h4>

In [16]:
df_tor.loc[df_tor['Neighbourhood'].isnull(), 'Neighbourhood'] = df_tor['Borough']

<h4> Group the rows with the same Postal Code </h4>

In [17]:
df_tor = df_tor.groupby('Postal Code').agg({'Borough':'first', 
                             'Neighbourhood': ', '.join}).reset_index()
df_tor.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<h4> Number of Rows in the DataFrame </h4>

In [18]:
df_tor.shape[0]

103

<h4> Import the Locations CSV </h4>

In [19]:
locations = pd.read_csv('Geospatial_Coordinates.csv')
locations['Postal Code'] = locations['Postal Code'].astype(str)
locations['Latitude'] = locations['Latitude'].astype(float)
locations['Longitude'] = locations['Longitude'].astype(float)
locations.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<h4> Merge the two tables  </h4>

In [20]:
df_tor = df_tor.merge(right=locations, how='left', on='Postal Code')
df_tor.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<h5> Get the location of Toronto </h5>

In [22]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


<h5> Create a Map of Toronto </h5>

In [23]:
map_tr = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Borough'], df_tor['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tr)  

In [24]:
toronto_data = df_tor
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<h5> Draw the Map using Folium </h5>

In [25]:
map_etr = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etr)  
    
map_etr

<h5> Declare the variables for FourSquare API </h5>

In [61]:
CLIENT_ID = 'RNZ2XIOEPJHNU4LBX433X0TLTVHTDT3UYUXP0NSICKBZZRME' # your Foursquare ID
CLIENT_SECRET = 'S1II4AOOWNCDLDQMAXLO5OIDUFOJJRRDUFER5BFAMK0NH0ZM' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RNZ2XIOEPJHNU4LBX433X0TLTVHTDT3UYUXP0NSICKBZZRME
CLIENT_SECRET:S1II4AOOWNCDLDQMAXLO5OIDUFOJJRRDUFER5BFAMK0NH0ZM


In [50]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_data['Borough'].unique()),
        toronto_data.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


<h5> Get the location of Scarborough </h5>

In [51]:
address = 'Scarborough, Toronto'

geolocator = Nominatim(user_agent="scar_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [52]:
scar_data = toronto_data[toronto_data['Borough'] == 'Scarborough'].reset_index(drop=True)

<h5> Create a map of Scarborough using Folium </h5>

In [53]:
map_scar = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scar_data['Latitude'], scar_data['Longitude'], scar_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scar)  
    
map_scar

<h5> Create a function to get Top 100 venues </h5>

In [60]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

scar_venues = getNearbyVenues(names=scar_data['Neighbourhood'],
                                   latitudes=scar_data['Latitude'],
                                   longitudes=scar_data['Longitude']
                                  )



Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge


<h5> Get number of unique venue categories </h5>

In [75]:
len(scar_venues['Venue Category'].unique())

117

<h5> Get number of unique restaurants </h5>

In [None]:
scar_restaurants = scar_venues[scar_venues['Venue Category'].str.contains('Restaurant', regex=False)]
print('{} restaurants were returned by Foursquare.'.format(scar_restaurants.shape[0]))

<h5> EDA for Neighbourhoods </h5>

In [67]:
scar_restaurants.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,22,22,22,22,22,22
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",10,10,10,10,10,10
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,9,9,9,9,9,9
"Clairlea, Golden Mile, Oakridge",4,4,4,4,4,4
"Clarks Corners, Sullivan, Tam O'Shanter",12,12,12,12,12,12
"Cliffcrest, Cliffside, Scarborough Village West",3,3,3,3,3,3
"Dorset Park, Scarborough Town Centre, Wexford Heights",12,12,12,12,12,12
"East Birchmount Park, Ionview, Kennedy Park",6,6,6,6,6,6
"Guildwood, Morningside, West Hill",3,3,3,3,3,3
