### Part 1

## Importing/Installing Libraries

In [3]:
#conda install -c anaconda beautifulsoup4
import pandas as pd
from bs4 import BeautifulSoup
import requests

## Obtaining the data from Wikipedia with request.get and Beautiful Soup

In [4]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(data, 'html.parser')

## Creating Lists for each of the Columns to store the data scraped 

In [5]:
postalCodeList = []
boroughList = []
neighborhoodList = []
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text.rstrip('\n'))

## Creating a dataframe using the information stored in the list above

In [6]:
df = pd.DataFrame({"PostalCode": postalCodeList, "Borough": boroughList, "Neighborhood": neighborhoodList})
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Dropping the "Not assigned" from the Borough column

In [7]:
df = df[df.Borough != "Not assigned"].reset_index(drop=True)

## Group the data by the PostalCode and then Borough

In [8]:
df = df.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## Now to Create the df that the example provided.

In [21]:
column_names = ["PostalCode", "Borough", "Neighborhood"]
fixed_df = pd.DataFrame(columns=column_names)
postal_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for code in postal_list:
    fixed_df = fixed_df.append(df[df["PostalCode"] == code], ignore_index=True)
    
fixed_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M5G,Downtown Toronto,Central Bay Street
1,M2H,North York,Hillcrest Village
2,M4B,East York,"Woodbine Gardens, Parkview Hill"
3,M1J,Scarborough,Scarborough Village
4,M4G,East York,Leaside
5,M4M,East Toronto,Studio District
6,M1R,Scarborough,"Maryvale, Wexford"
7,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."
8,M9L,North York,Humber Summit
9,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo..."


## Shape of the dataframe

In [22]:
df.shape

(103, 3)