<h1>Scraping Neighborhoods in Toronto</h1>

<h2>1) Import needed libraries</h2>

In [1]:
#!pip install BeautifulSoup4 
from bs4 import BeautifulSoup
#!conda install -c anaconda lxml --yes
import requests # library to handle requests
import pandas as pd

<h2>2) Aquire the webpage containing the information we need</h2>

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

<h2>3) Seperate the table of interest into a dataframe</h2>

In [3]:
table = soup.table
Toronto_Neigh = pd.read_html(str(table), header=0)[0]

Toronto_Neigh.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


<h2>4) Remove postcodes with unassigned boroughs</h2>

In [4]:
for index, row in Toronto_Neigh.iterrows():
    if row['Borough']=="Not assigned":
        Toronto_Neigh = Toronto_Neigh.drop(index)
Toronto_Neigh.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


<h2>5) Replace neighbourhood with borough name for postcodes with no assigned neighbourhood info</h2>

In [5]:
for index, row in Toronto_Neigh.iterrows():
    if row['Neighbourhood']=="Not assigned":
        row['Neighbourhood'] = row['Borough']
Toronto_Neigh.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


<h2>6) Group neighbourhoods based on postcodes</h2>

In [6]:
Toronto_Neigh = Toronto_Neigh.groupby(['Postcode','Borough'])['Neighbourhood'].apply(','.join)
Toronto_Neigh = pd.DataFrame(Toronto_Neigh).reset_index()

<h2>Here is the resulting data frame and its shape</h2>

In [7]:
Toronto_Neigh.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
Toronto_Neigh.shape

(103, 3)

In [9]:
%store Toronto_Neigh

Stored 'Toronto_Neigh' (DataFrame)
