This is the notebook for the course capstone project - Week 3.

In [1]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
from urllib.request import urlopen

# get the data from Wikipage 

link: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

## use soup to get the informaion 

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')

soup = soup.find_all('table')

## Create array to hold the data we extract

In [3]:
postal_codes = []
boroughs = []
neighborhoods = []

table = soup[0]
rows = table.find_all('tr')

for row in rows:
    cells = row.find_all('td')


    if len(cells) > 1:
        
        postal_code = cells[0]
        postal_codes.append(postal_code.text.strip())

        borough = cells[1]
        boroughs.append(borough.text.strip())

        neighborhood = cells[2]
        neighborhoods.append(neighborhood.text.strip())

## convert to DataFrame

In [4]:
df = pd.DataFrame(postal_codes,columns=['PostalCode'])

df['Borough'] = boroughs

df['Neighborhood'] = neighborhoods

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


## drop the not assigned columns 

In [5]:
df[df['Borough'] == 'Not assigned'] = None

df.dropna(axis=0,inplace=True)

## check if there are postal codes with multiple neighborhoods  

when getting the data from the web, if one postal code has multiple neighborhoods, they are combined together with ','

In [6]:
len(df['PostalCode'].unique()) == len(df['PostalCode'])

True

## print the number of rows 

In [7]:
print('Number of rows:',df.shape[0])

Number of rows: 103


In [8]:
## insert latitude and longitude 

In [9]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

In [10]:
postal_code = df['PostalCode'].iloc[1]
postal_code

'M4A'

In [11]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
df = df.merge(df_geo, left_on='PostalCode', right_on='Postal Code')
df.drop(['Postal Code'],axis=1,inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## create a map

In [13]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [14]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [15]:
import folium # map rendering library

In [16]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## find all neighborhoods with Toronto in them 

In [17]:
df_Borough_toronto = df[df['Borough'].str.contains("Toronto")]

df_Borough_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_Borough_toronto['Latitude'], df_Borough_toronto['Longitude'], df_Borough_toronto['Borough'], df_Borough_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto