In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

Another option to get the table
```python
import wikipedia as wp
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8") #Get the html source
df = pd.read_html(html)[0]       # get the table to df
df.to_csv('beautifulsoup_pandas.csv',header=0,index=False) # save the table to csv
new_header = df.iloc[0]          #grab the first row for the header
df = df[1:]                      #take the data less the header row
df.columns = new_header          #set the header row as the df header
df.head()
```

Read the source code for a given web page and creating a BeautifulSoup (soup)object 

In [2]:
url=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')
#print(soup.prettify())    # print the full text 

Find class ‘wikitable sortable’ in the HTML script

In [3]:
table = soup.find('table',{'class':'wikitable sortable'})
table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>

Each row is start with **tr** and end with **/tr**. Each cell is start with **td** and end with **/td**.
Get the header of the table

In [4]:
ths = table.find_all('th')
headings = [th.text.strip() for th in ths]
headings

['Postcode', 'Borough', 'Neighbourhood']

Get each row and append to a list

In [5]:
lists=[]
for row in table.findAll("tr"):
    cells = row.findAll("td")
    items = [th.text.strip() for th in cells]
    lists.append(items)

Convert the list to dataframe

In [6]:
df=pd.DataFrame(lists[1:])
df.columns=headings
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Drop rows if it contains a 'Not assigned'in Borough column.

In [7]:
df = df[df.Borough != 'Not assigned']

Merge rows with same Postcode and Borough

In [8]:
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x:','.join(x.astype(str))).reset_index()
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


Replace Not assigned neighborhood with its borough.

In [9]:
Vpostcode = df['Borough']
df['Neighbourhood']=Vpostcode.where(df['Neighbourhood']=='Not assigned',other=df['Neighbourhood'])
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
