**Import libraries that will be used.**

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

**Assign the wiki link to a variable and send a request to access data from the website.**   
**Assign the data into a variable.**

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)
data = r.content

**Parse the data.**

In [3]:
s = BeautifulSoup(data, 'html.parser')

**Find the table.**

In [4]:
table = s.find('table', attrs = {'class': "wikitable sortable"})

**Create a function to clean the data and assign the cleaned data into a dataframe.**

In [5]:
def dataframe(table):
    col_list = []
    data_list = []
    
    for col in table.find_all('th'):
        col_list.append(col.get_text().strip())
    
    for t in table.find_all('td'):
        data_list.append(t.get_text().strip())
    a = np.array(data_list)
    num_rows = a.shape[0]/len(col_list)
    row_array = np.reshape(a,(int(num_rows),len(col_list)))
    
    df = pd.DataFrame(data = row_array, columns = col_list)
    indices_to_drop = df[df['Borough'] == 'Not assigned'].index
    df.drop(index = indices_to_drop, inplace = True)
    df.reset_index(inplace = True, drop = True)
    
    return df

**Check to see the data has been cleaned correctly.**

In [6]:
df = dataframe(table)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


**The output below represents the example shown in the course project.**

In [7]:
part_df = df.loc[(df['Postal Code'] == 'M5G') |
                 (df['Postal Code'] == 'M2H') |
                 (df['Postal Code'] == 'M4B') |
                 (df['Postal Code'] == 'M1J') |
                 (df['Postal Code'] == 'M4G') |
                 (df['Postal Code'] == 'M4M') |
                 (df['Postal Code'] == 'M1R') |
                 (df['Postal Code'] == 'M9V') |
                 (df['Postal Code'] == 'M9L') |
                 (df['Postal Code'] == 'M5V') |
                 (df['Postal Code'] == 'M1B') |
                 (df['Postal Code'] == 'M5A')]
part_df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M1B,Scarborough,"Malvern, Rouge"
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
23,M4G,East York,Leaside
24,M5G,Downtown Toronto,Central Bay Street
27,M2H,North York,Hillcrest Village
32,M1J,Scarborough,Scarborough Village
50,M9L,North York,Humber Summit
54,M4M,East Toronto,Studio District
71,M1R,Scarborough,"Wexford, Maryvale"


**Confirm all rows that has "Not assigned" values under column "Borough" have been removed.**

In [8]:
print('Column "Borough" has {} "Not assigned" values'.format(len(df[df['Borough'] == 'Not assigned'])))

Column "Borough" has 0 "Not assigned" values


**Confirm Postal Code "M5A" has two neighbourhood locations separated by a comma.**

In [9]:
df[df['Postal Code'] == 'M5A']

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"


**Confirm there are no "Not assigned" values in column "Neighbourhood."**

In [10]:
print('Column "Neighbourhood" has {} "Not assigned" values'.format(len(df[df['Neighbourhood'] == 'Not assigned'])))

Column "Neighbourhood" has 0 "Not assigned" values


**Print the shape of the dataframe.**

In [11]:
print(df.shape)

(103, 3)
