In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = requests.get(url)
if response.status_code == 200:
    print(response.content[0:500])
    page_data_soup = BeautifulSoup(response.content,'lxml')
else:
    print("Error requesting this URL")

b'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of postal codes of Canada: M - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"d050a60c-aba9-489d-8cf'


In [3]:
# web scrape the tables on Wikipedia Page.
import pandas as pd
a = []
table_heads = page_data_soup.find('table', class_="wikitable sortable")
for i in table_heads.find_all('tr'):
    b=list()
    for j in i.find_all('th'):
        b.append(j.get_text().replace("\n", '').replace(",", ''))
    for j in i.find_all('td'):
        b.append(j.get_text().replace("\n", '').replace(",", ''))
    a.append(b)
scrap_data = pd.DataFrame(a)
scrap_data.columns = scrap_data.iloc[0]            # set columns 
scrap_data.drop(scrap_data.index[0],inplace =True)
scrap_data.reset_index(inplace=True)               # reset index
scrap_data

Unnamed: 0,index,Postal Code,Borough,Neighbourhood
0,1,M1A,Not assigned,Not assigned
1,2,M2A,Not assigned,Not assigned
2,3,M3A,North York,Parkwoods
3,4,M4A,North York,Victoria Village
4,5,M5A,Downtown Toronto,Regent Park Harbourfront
...,...,...,...,...
175,176,M5Z,Not assigned,Not assigned
176,177,M6Z,Not assigned,Not assigned
177,178,M7Z,Not assigned,Not assigned
178,179,M8Z,Etobicoke,Mimico NW The Queensway West South of Bloor Ki...


In [4]:
# remove all the cells with a borough that is Not assigned..
scrap_data = scrap_data[(scrap_data.Borough != 'Not assigned')]

# combine the rows sharing the same postal codes. concatnate the neighbourhoods with comma.
group_data = scrap_data.groupby('Postal Code').agg({
    'Borough': lambda x: x,
    'Neighbourhood': lambda x: ', '.join(x)
})
group_data

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,Malvern Rouge
M1C,Scarborough,Rouge Hill Port Union Highland Creek
M1E,Scarborough,Guildwood Morningside West Hill
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,Kingsview Village St. Phillips Martin Grove Ga...
M9V,Etobicoke,South Steeles Silverstone Humbergate Jamestown...


In [5]:
group_data[group_data.Neighbourhood == 'Not assigned']
# All right, No rows with neighourbhood not assigned.

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1


In [6]:
# In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.
group_data.shape

(103, 2)

In [7]:
# !pip install geocoder
import geocoder # import geocoder

In [8]:
# initialize your variable to None

def get_postal_code(postal_code):
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

    return(latitude, longitude)

# get_postal_code('M5G') # took too long to work.

In [9]:
import pandas as pd
import numpy as np

Geospatial_Coordinates = pd.read_csv('Geospatial_Coordinates.csv')
Geospatial_Coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# merge the group_Data with Geospatial_Coordinates to add coordinates of each postal code. 

group_data = group_data.merge(Geospatial_Coordinates, on = 'Postal Code')

In [11]:
group_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,Malvern Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill Port Union Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood Morningside West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,Kingsview Village St. Phillips Martin Grove Ga...,43.688905,-79.554724
101,M9V,Etobicoke,South Steeles Silverstone Humbergate Jamestown...,43.739416,-79.588437
