# DataFrame creation using WebScraping Methods of BeautifulSoup4

### Import the required libraries

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from urllib.request import urlopen

#### Set the URL for the website

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

#### use BeautifulSoup library to find the table from the website

In [3]:
html = urlopen(url) 
soup = BeautifulSoup(html, 'html.parser')

In [4]:
soup = soup.find_all('table')

#### Get the column names of the table data

In [5]:
index_vals = soup[0].find_all('th')

## DataFrame Creation

#### Create DataFrame

In [6]:
df = pd.DataFrame()

#### Fill DataFrame with heading and Data as per given conditions and order the data as required.

In [7]:
rows = soup[0].find_all('tr')
headings = [j.text.strip() for j in index_vals]
all_rows = [[j.text.strip() for j in rows[i].find_all('td')] for i in range(1, len(rows))]
for row in all_rows:
    if df.shape != (0, 0):
        if row[0] not in df[headings[0]].values:
            if row[1] != 'Not assigned':
                if row[2] == '':
                    row[2] = row[1]
                    df = df.append(dict(zip(headings, row)), ignore_index=True)
                else:
                    df = df.append(dict(zip(headings, row)), ignore_index=True)
        else:
            index_num = df[df[headings[0]] == row[0]].index[0]
            df.at[index_num, headings[2]] = df.iloc[index_num][headings[2]] + ', ' + row[2]
    else:
        if row[1] != 'Not assigned':
            if row[2] == '':
                row[2] = row[1]
                df = df.append(dict(zip(headings, row)), ignore_index=True)
            else:
                df = df.append(dict(zip(headings, row)), ignore_index=True)

In [8]:
headings

['Postal Code', 'Borough', 'Neighborhood']

In [9]:
df = df.reindex(columns=headings)

In [10]:
df.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


#### Display the shape of the DataFrame

In [11]:
print(df.shape)

(103, 3)
