# Importing the necessary packages

In [4]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests

## Getting data from the wikipedia page 

In [5]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

## Parse the data from HTML object to BeautifulSoup object

In [7]:
bsObject = BeautifulSoup(data, 'html.parser')

### Creating empty lists for storing data 

In [13]:
postalCodeList = []
boroughList = []
neighborhoodList = []

### Populating the data into the lists

In [14]:
for row in bsObject.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        if(cells[1].text != 'Not assigned'):
            postalCodeList.append(cells[0].text)
            boroughList.append(cells[1].text)
            neighborhoodList.append(cells[2].text.rstrip('\n'))

### Creating the DataFrame

In [15]:
df = pd.DataFrame({
    'PostalCode': postalCodeList,
    'Borough': boroughList,
    'Neighborhood': neighborhoodList
})

### Grouping the DataFrame using the PostalCode and Borough

In [20]:
df_grouped = df.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Assinging the value of Borough to the Neighborhood, if the value of Neighborhood is not 'Not assigned'

In [21]:
for index, row in df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]
        
df_grouped.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Printing the shape of the DataFrame

In [22]:
df_grouped.shape

(103, 3)