## Step 0: Import libraries

In [1]:
# Importing the essential libraries
import requests    # to use get method of requests library to send a GET request
import pandas as pd     # to organize the data into dataframes
from bs4 import BeautifulSoup    # to parse html web pages

## Step 1: Send a get request to Wikipedia page

In [2]:
#Sending a GET request to 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

## Step 2: Parsing the output html page

In [3]:
# Parsing the output using BeautifulSoup and html parser
complete_soup = BeautifulSoup(response.content, 'html.parser')

# Extracting the wikipedia postal index table
table = complete_soup.find('table', class_ = 'wikitable sortable')

# Find all the table headings using 'th' tags i.e. Postal Code, Borough, Neighbourhood
table_headings = table.find_all('th')

# Storing table headings in 'indexes' array and stripping any unwanted spaces
indexes = []
for heading in table_headings:
    indexes.append(heading.text.strip())

# Finding all the rows using 'tr' tags
table_rows = table.find_all('tr')

# Storing all the data of postal code table in array
table_data_array = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [t.text.strip() for t in td]
    table_data_array.append(row)

# Creating a dataframe from array and indexes
df = pd.DataFrame(table_data_array, columns=indexes)

## Step 3: Cleaning the dataframe

In [4]:
# Dropping rows with a borough that is Not assigned
filtered_df = df[(df['Borough'] != 'Not assigned')]
# Dropping rows that contains NA values
filtered_df = filtered_df.dropna()

In [5]:
# Resetting the index and dropping the old index after the cleaning
filtered_df = filtered_df.reset_index(drop=True)

In [6]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
filtered_df['Neighbourhood'] = filtered_df['Neighbourhood'].replace(to_replace='Not assigned', value=filtered_df['Borough'])

## Step 4: Checking the shape of dataframe

In [7]:
# Checking shape of dataframe
filtered_df.shape

(103, 3)