In [2]:
# Now that you have built a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name, in order to utilize the Foursquare location data,
# we need to get the latitude and the longitude coordinates of each neighborhood.

#In this notebook, I will
# - Recreate the table as madein the previous notebook
# - Download the CSV containing latitute & longitude
# - Merge them into a single dataframe

import pandas as pd
from bs4 import BeautifulSoup
import requests

In [5]:
# - Recreate the table as madein the previous notebook

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

source = requests.get(url).text
wiki_page = BeautifulSoup(source, "lxml") 
table = wiki_page.find("table", class_ = "wikitable sortable")

row= []
for tr in table.find_all('tr'): #finding "tr" as that indicates a new row
    data = tr.find_all('td') #Finding "td" as that indicates a new cell
    row.append([i.text for i in data]) #and append the cells to a new row in the list I'm creating
       
tbl = pd.DataFrame( data  = row ) #convert the list into a dataframe

tbl.columns=["Postal Code", "Borough", "Neighborhood"]
tbl = tbl.drop([0]) #remove the 1st row with "none" all along
tbl = tbl[tbl.Borough != "Not assigned"]

tbl["Neighborhood"] = tbl["Neighborhood"].str.strip()
tbl = tbl.groupby(["Postal Code", "Borough"])["Neighborhood"].apply(', '.join).reset_index()

tbl.loc[tbl.Neighborhood == "Not assigned", "Neighborhood"] = tbl.Borough

tbl.head(5)


Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
# - Download the CSV containing latitute & longitude

LatLon = pd.read_csv("https://cocl.us/Geospatial_data")
print(LatLon.head())
print(LatLon.shape)

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
(103, 3)


In [9]:
# - Merge them into a single dataframe
# It seems theyre are in the same order, but let's merge them nicely on the Postal Code column.

df = pd.merge(tbl, LatLon, on="Postal Code")
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
