**Import libraries that will be used.**

In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import pickle

**Assign the wiki link to a variable and send a request to access data from the website.**   
**Assign the data into a variable.**

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)
data = r.content

**Parse the data.**

In [3]:
s = BeautifulSoup(data, 'html.parser')

**Find the table.**

In [4]:
table = s.find('table', attrs = {'class': "wikitable sortable"})

**Create a function to clean the data and assign the cleaned data into a dataframe.**

In [5]:
def dataframe(table):
    col_list = []
    data_list = []
    
    for col in table.find_all('th'):
        col_list.append(col.get_text().strip())
    
    for t in table.find_all('td'):
        data_list.append(t.get_text().strip())
    a = np.array(data_list)
    num_rows = a.shape[0]/len(col_list)
    row_array = np.reshape(a,(int(num_rows),len(col_list)))
    
    df = pd.DataFrame(data = row_array, columns = col_list)
    df.rename(columns = {'Neighbourhood' : 'Neighborhood'}, inplace = True)
    indices_to_drop = df[df['Borough'] == 'Not assigned'].index
    df.drop(index = indices_to_drop, inplace = True)
    df.reset_index(inplace = True, drop = True)
    
    return df

**Check to see the data has been cleaned correctly.**

In [6]:
df = dataframe(table)
df.head(12)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


**The output below represents the example shown in the course project.**

In [7]:
part_df1 = df[df['Postal Code'] == 'M5G']
part_df2 = df[df['Postal Code'] == 'M2H']
part_df3 = df[df['Postal Code'] == 'M4B']
part_df4 = df[df['Postal Code'] == 'M1J']
part_df5 = df[df['Postal Code'] == 'M4G']
part_df6 = df[df['Postal Code'] == 'M4M']
part_df7 = df[df['Postal Code'] == 'M1R']
part_df8 = df[df['Postal Code'] == 'M9V']
part_df9 = df[df['Postal Code'] == 'M9L']
part_df10 = df[df['Postal Code'] == 'M5V']
part_df11 = df[df['Postal Code'] == 'M1B']
part_df12 = df[df['Postal Code'] == 'M5A']

part_df = pd.concat([part_df1,
                     part_df2,
                    part_df3,
                    part_df4,
                    part_df5,
                    part_df6,
                    part_df7,
                    part_df8,
                    part_df9,
                    part_df10,
                    part_df11,
                    part_df12])

part_df.reset_index(inplace = True, drop = True)
part_df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Parkview Hill, Woodbine Gardens"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Wexford, Maryvale"
7,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har..."


**Confirm all rows that has "Not assigned" values under column "Borough" have been removed.**

In [8]:
print('Column "Borough" has {} "Not assigned" values'.format(len(df[df['Borough'] == 'Not assigned'])))

Column "Borough" has 0 "Not assigned" values


**Confirm Postal Code "M5A" has two neighbourhood locations separated by a comma.**

In [9]:
df[df['Postal Code'] == 'M5A']

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"


**Confirm there are no "Not assigned" values in column "Neighborhood."**

In [10]:
print('Column "Neighborhood" has {} "Not assigned" values'.format(len(df[df['Neighborhood'] == 'Not assigned'])))

Column "Neighborhood" has 0 "Not assigned" values


**Print the shape of the dataframe.**

In [11]:
print(df.shape)

(103, 3)


**Save dataframe into a pickle file.**

In [12]:
df.to_pickle('Toronto Neighborhood.pkl')