In [247]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

Below we get the HTML with requests, and cut all of the HTML around the first table out of the variable.
We're assuming that the table we want is the first table on the page

In [248]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
#print(soup.prettify())
soup = str(soup)
start = soup.find('<tbody')
end = soup.find('</tbody>')
table = soup[start:end]
print(table)

<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>
<td><a href="/wiki/North_York" tit

We split the rows of the table text into a list and coerce this into a dataframe.
We're assuming the table has a header row, and that each table row is denoted by a <tr> tag, as well as each cell in a row being denoted by a <td> tag.

In [249]:
dataframe = []
table = table[table.find('</th>'):]
table = table[table.find('</tr>')+4:]
append = table.split('<tr>')
dataframe = [i.split('<td>') for i in append]



df = pd.DataFrame(dataframe)        

We need to clean up the data frame a little. We're just finding/replacing and slicing out a bunch of the HTML surrounding dataframe values

In [250]:
df.rename(columns={1: 'PostalCode', 2:'Borough', 3:'Neighborhood'}, inplace = True)
df.drop(columns=0, inplace=True)

df['PostalCode'] = df['PostalCode'].str[:3]

df.drop([0], inplace = True)

for i, j in df.iterrows():
    j.Borough = j.Borough.replace('</a>', '')
    j.Borough = j.Borough.replace('</td>', '')
    j.Borough = j.Borough.replace('\n', '')
    if j.Borough.find('<a') > -1:
        j.Borough = j.Borough[j.Borough.find('>')+1:]
    j.Neighborhood = j.Neighborhood.replace('</td></tr>', '')
    j.Neighborhood = j.Neighborhood.replace('</a>', '')
    j.Neighborhood = j.Neighborhood.replace('\n', '')
    if j.Neighborhood.find('<a') > -1:
        j.Neighborhood = j.Neighborhood[j.Neighborhood.find('>')+1:]
    
df = df.loc[df['Borough'] != 'Not assigned']
df.reset_index()

Unnamed: 0,index,PostalCode,Borough,Neighborhood
0,3,M3A,North York,Parkwoods
1,4,M4A,North York,Victoria Village
2,5,M5A,Downtown Toronto,Harbourfront
3,6,M5A,Downtown Toronto,Regent Park
4,7,M6A,North York,Lawrence Heights
5,8,M6A,North York,Lawrence Manor
6,9,M7A,Queen's Park,Not assigned
7,11,M9A,Etobicoke,Islington Avenue
8,12,M1B,Scarborough,Rouge
9,13,M1B,Scarborough,Malvern


To consolidate duplicates, we'll group by postal code and borough, and aggregate the neighborhood values.

In [252]:
df_grouped = pd.DataFrame(df.groupby(['PostalCode', 'Borough'])['Neighborhood'].aggregate(lambda x: ', '.join(x)))
df_grouped.reset_index(inplace=True)
print(df_grouped[:50])
df_grouped.shape

   PostalCode          Borough  \
0         M1B      Scarborough   
1         M1C      Scarborough   
2         M1E      Scarborough   
3         M1G      Scarborough   
4         M1H      Scarborough   
5         M1J      Scarborough   
6         M1K      Scarborough   
7         M1L      Scarborough   
8         M1M      Scarborough   
9         M1N      Scarborough   
10        M1P      Scarborough   
11        M1R      Scarborough   
12        M1S      Scarborough   
13        M1T      Scarborough   
14        M1V      Scarborough   
15        M1W      Scarborough   
16        M1X      Scarborough   
17        M2H       North York   
18        M2J       North York   
19        M2K       North York   
20        M2L       North York   
21        M2M       North York   
22        M2N       North York   
23        M2P       North York   
24        M2R       North York   
25        M3A       North York   
26        M3B       North York   
27        M3C       North York   
28        M3H 

(103, 3)