### 1. Import the necessary packages

In [None]:
from bs4 import BeautifulSoup
import requests
from itertools import zip_longest
import pandas as pd

### 2. Scrap the Wikipedia page to load the table

In [53]:
# Make a GET request to fetch the raw HTML content
html_content = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

# Parse the html content
soup = BeautifulSoup(html_content,features="html.parser")

#print(soup.prettify())
#print("Title: ", soup.title.text)

postal_table=soup.find("table",attrs={"class": "wikitable"})
postal_table_data=postal_table.tbody.find_all("tr")

# Get the headings in the table
headings=[]

for th in postal_table_data[0].find_all("th"):
        headings.append(th.text.replace('\n','').strip())

# Get the rest of the table
data = []
data.append(headings)
for tr in postal_table.tbody.find_all("tr"):
        table_data=[]
        # Get the data
        for td in tr.find_all("td"):
                t_row=td.text.replace('\n','').strip()
                table_data.append(t_row)

        data.append(table_data)

# Convert the into DataFrame
df=pd.DataFrame(data)
new_header = (df.iloc[0]).to_list() 
df.columns = new_header
df=df[2:]
df.reset_index(drop=True,inplace=True)
df.rename(columns={"Postal code": "Postal Code"},inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


### 3. Clear the dataset

In [61]:
index=list(range(0,len(df)))

ind_list=[]
# Ignore rows with "Not assigned" values
for index,row in df.iterrows():
    if row[1]=='Not assigned':
        ind_list.append(index)

df.drop(ind_list,inplace=True)
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### 4. Shape of the dataset

In [55]:
df.shape

(103, 3)

### 5. Add the coordination columns to data

In [59]:
df_coor=pd.read_csv("Geospatial_Coordinates.csv")
df_coor.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [64]:
# Merge two datasets
#df_new = pd.concat([df_coor,df],axis=1)
df_new= pd.merge(df_coor, df, on='Postal Code')
df_new

Unnamed: 0,Postal Code,Latitude,Longitude,Borough,Neighborhood
0,M1B,43.806686,-79.194353,Scarborough,Malvern / Rouge
1,M1C,43.784535,-79.160497,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,43.763573,-79.188711,Scarborough,Guildwood / Morningside / West Hill
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae
...,...,...,...,...,...
98,M9N,43.706876,-79.518188,York,Weston
99,M9P,43.696319,-79.532242,Etobicoke,Westmount
100,M9R,43.688905,-79.554724,Etobicoke,Kingsview Village / St. Phillips / Martin Grov...
101,M9V,43.739416,-79.588437,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...
