### Load Libraries 

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

### Obtain Data from Wikipedia

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [3]:
data = requests.get(url).text
soup = BeautifulSoup(data,'html.parser')

In [4]:
tables = soup.find('table')

### Create a list that contains elements storing the details of neighborhoods

In [5]:
neighborhood_details = []
for row in tables.find_all('tr'):
    for col in row.find_all('p'):
        neighborhood_details.append(col.text)
neighborhood_details

['M1ANot assigned\n',
 'M2ANot assigned\n',
 'M3ANorth York(Parkwoods)\n',
 'M4ANorth York(Victoria Village)\n',
 'M5ADowntown Toronto(Regent Park / Harbourfront)\n',
 'M6ANorth York(Lawrence Manor / Lawrence Heights)\n',
 "M7AQueen's Park(Ontario Provincial Government)\n",
 'M8ANot assigned\n',
 'M9AEtobicoke(Islington Avenue)\n',
 'M1BScarborough(Malvern / Rouge)\n',
 'M2BNot assigned\n',
 'M3BNorth York(Don Mills)North\n',
 'M4BEast York(Parkview Hill / Woodbine Gardens)\n',
 'M5BDowntown Toronto(Garden District, Ryerson)\n',
 'M6BNorth York(Glencairn)\n',
 'M7BNot assigned\n',
 'M8BNot assigned\n',
 'M9BEtobicoke(West Deane Park / Princess Gardens / Martin Grove / Islington / Cloverdale)\n',
 'M1CScarborough(Rouge Hill / Port Union / Highland Creek)\n',
 'M2CNot assigned\n',
 'M3CNorth York(Don Mills)South(Flemingdon Park)\n',
 'M4CEast York(Woodbine Heights)\n',
 'M5CDowntown Toronto(St. James Town)\n',
 'M6CYork(Humewood-Cedarvale)\n',
 'M7CNot assigned\n',
 'M8CNot assigned\n',


### Create a dataframe and add data into the dataframe with required modifications in data format
1. Remove data that doesn't have any assigned data
2. Modify data format that has more than one neighborhoods allocated to one postalcode
3. Modify/Replace Borough Names

In [8]:
df = pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])

for i in neighborhood_details:
    if 'Not assigned' in i:
        pass
    else:
        postalcode = i[:3]
        borough = i[3:].split('(')[0]
        neighborhood = i[3:].split('(')[1].split(')')[0].replace(' /',',')
        
        df = df.append({'PostalCode':postalcode, "Borough":borough, "Neighborhood":neighborhood}, ignore_index=True)

df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto Business,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


### DataFrame Shape

In [7]:
df.shape

(103, 3)