## Importing all necessary librariers

In [221]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

## Obtaining the content of the webpage with Requests and BeautifulSoup

In [222]:
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
site=requests.get(url)
doc=BeautifulSoup(site.text, "lxml")
table=doc.find("table", class_="wikitable sortable")
postcode=[]
borough=[]
neighborhood=[]
rows=table.find_all("tr")
for columns in rows:
    a=columns.text.strip().split("\n")
    if a[1]!="Not assigned": # Skip all rows with a "Not assigned" as borough
        borough.append(a[1])
        postcode.append(a[0])
        neighborhood.append(a[2])
# delete the describing header as the first element of the lists
del postcode[0] 
del borough[0]
del neighborhood[0]

## Creating a data frame from the three derived lists

In [223]:
df = pd.DataFrame()
df["PostalCode"]=postcode
df["Borough"]=borough
df["Neighborhood"]=neighborhood
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


## Iterating through the data frame and assigning the value of the corresponding borough in that lines that have a "Not assigned" as value in the column "Neighborhood"

In [224]:
for p, b in df.iterrows():
    if b["Neighborhood"] == "Not assigned":
        b["Neighborhood"] = b["Borough"]
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


## Merging the value of the neighborhood in rows with the same postal code and borough

In [225]:
df_gr= df.groupby(["PostalCode","Borough"]).Neighborhood.unique().reset_index()
df_gr.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


## Joining the list elements in the column "Neighborhood" to a string with a "," as delimiter

In [226]:
df_gr["Neighborhood"]=df_gr.Neighborhood.str.join(',')
df_gr.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


## Checking the updated shape of the data frame

In [227]:
df_gr.shape

(103, 3)