## Q1 - Scraping Data From Wikipedia ##

#### <font color='blue'>Step 1 - Importing Required Libraries </font> ####

In [2]:
# importing necessary libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

#### <font color='blue'>Step 2 - Scraping Data From Wikipedia using Beautiful Soup Library</font> ####

In [21]:
# getting data from internet
wikilink='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikilink).text

# using beautiful soup to parse the HTML/XML codes.
soup = BeautifulSoup(raw_wikipedia_page,'xml')
#print(soup.prettify())

#### <font color='blue'>Step 3 - Reading Table Data from Wikipedia</font> ####

In [24]:
table = soup.find('table',{'class':'wikitable sortable'})

Postcode      = []
Borough       = []
Neighbourhood = []

table_rows = table.find_all('tr')

data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 180 entries, 1 to 180
Data columns (total 3 columns):
PostalCode       180 non-null object
Borough          180 non-null object
Neighbourhood    180 non-null object
dtypes: object(3)
memory usage: 5.6+ KB


In [25]:
df.shape

(180, 3)

#### <font color='blue'>Step 4 - Dropping rows with Not Assigned Borough's</font> ####

In [37]:

df.loc[df['Neighbourhood'] =='Not assigned' , 'Neighbourhood'] = df['Borough']

df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)

#### <font color='blue'>Checking Count Using Shape Method</font> ####

In [36]:
df.shape

(103, 3)

#### <font color='blue'>Step 5 - Grouping Same Neighborhood with Same Postal Code</font> ####

In [39]:
df = df.groupby(["PostalCode", "Borough"])["Neighbourhood"].apply(", ".join).reset_index()
df.head(20)

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"
