## 1. Import libreries

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## 2. Import the data from Wikipedia

In [2]:
# import data
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
soup = BeautifulSoup(data ,'html.parser')

In [4]:
# create list to store the data 
PostalCode = []
borough = []
neighborhood =[]

In [5]:
for row in soup.find('table').find_all('tr'):
    cell = row.find_all('td')
    if (len(cell)) > 0:
        PostalCode.append(cell[0].text)
        borough.append(cell[1].text)
        neighborhood.append(cell[2].text) 

In [6]:
Toronto = pd.DataFrame({'PostalCode' : PostalCode,
                       'borough' : borough,
                       'neighborhood': neighborhood})
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A\n,Not assigned\n,\n
1,M2A\n,Not assigned\n,\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


## 3. Drop the last 2 letters (/n) from the column: postal_code and borough

In [7]:
Toronto['PostalCode'] = Toronto['PostalCode'].map(lambda x: str(x)[:-1])
Toronto['borough'] = Toronto['borough'].map(lambda x: str(x)[:-1])
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
0,M1A,Not assigned,\n
1,M2A,Not assigned,\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,"Regent Park, Harbourfront\n"


## 4. Drop the rows with postal_code not assigned

In [8]:
Toronto.drop(Toronto[Toronto['borough']=='Not assigned'].index, inplace=True)
Toronto.head()

Unnamed: 0,PostalCode,borough,neighborhood
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,"Regent Park, Harbourfront\n"
5,M6A,North York,"Lawrence Manor, Lawrence Heights\n"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government\n"


## 5. Group neighborhood with the same borough

In [9]:
Toronto = Toronto.groupby(['PostalCode','borough'], sort=False).agg( ', '.join)
Toronto.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,neighborhood
PostalCode,borough,Unnamed: 2_level_1
M3A,North York,Parkwoods\n
M4A,North York,Victoria Village\n
M5A,Downtown Toronto,"Regent Park, Harbourfront\n"
M6A,North York,"Lawrence Manor, Lawrence Heights\n"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government\n"


## 6. If Not assigned neighborhood, then neighborhood = borough

In [10]:
for index, row in Toronto.iterrows():
    if row['neighborhood'] == 'Not assigned':
        row['neighborhood'] == Toronto['borough']
Toronto.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,neighborhood
PostalCode,borough,Unnamed: 2_level_1
M3A,North York,Parkwoods\n
M4A,North York,Victoria Village\n
M5A,Downtown Toronto,"Regent Park, Harbourfront\n"
M6A,North York,"Lawrence Manor, Lawrence Heights\n"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government\n"


## 7. Shape

In [11]:
Toronto.shape

(103, 1)