### Import Dependencies

In [28]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Scrape from Wikipedia and Create Dataframe

In [29]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')

table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]
df = pd.DataFrame(row)
df.head()

Unnamed: 0,0
0,\nPostcode\nBorough\nNeighbourhood\n
1,\nM1A\nNot assigned\nNot assigned\n
2,\nM2A\nNot assigned\nNot assigned\n
3,\nM3A\nNorth York\nParkwoods\n
4,\nM4A\nNorth York\nVictoria Village\n


### Clean Dataframe

In [30]:
df = df[0].str.split('\n', expand=True)
df.rename(columns=df.iloc[0], inplace=True)
df.drop(df.index[0], inplace=True)
df.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
1,,M1A,Not assigned,Not assigned,
2,,M2A,Not assigned,Not assigned,
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,


### Drop Rows with Borough as Not Assigned

In [31]:
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True)
df.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,
6,,M6A,North York,Lawrence Heights,
7,,M6A,North York,Lawrence Manor,


### Combine Neighbourhoods with Same Postcode

In [32]:
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df.columns = ['Postcode', 'Borough', 'Neighbourhood']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### If Borough Exists, but Neighbourhood not assigned

In [33]:
df.loc[df['Neighbourhood'] == "Not assigned", "Neighbourhood"] = df["Borough"]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [35]:
df.shape

(103, 3)