# Toronto neighborhood webscrape

In [1]:
# Third party imports
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests

In [65]:
# retrieve the appropriate web page
response = requests.get(
	url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",
)
print(response.status_code)

200


In [66]:
# Use beautiful Soup to read the html
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find(id='firstHeading')
print(title.string)

List of postal codes of Canada: M


In [67]:
# Use beautiful soup to extract the table element and pass that into a list of values, ready to be turned into a dataframe
table_contents = []
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [68]:
# Pass values into a dataframe 
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace(
    {
        'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
        'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
        'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
        'MississaugaCanada Post Gateway Processing Centre':'Mississauga'
    }
)

In [69]:
df.shape

(103, 3)

In [70]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
    len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 15 boroughs and 103 neighborhoods.


# Section 2

In [59]:
import geocoder

Using the google geocode option was returning an error with <REQUEST DENIED>. I therefore switched to arcgis. 

In [82]:
for i, row in df.iterrows():
    code = row.PostalCode
    df['Latitude'].loc[df.PostalCode==code]= round(geocoder.arcgis(f'{code}, Toronto, Ontario').latlng[0], 5)
    df['Longitude'].loc[df.PostalCode==code]= round(geocoder.arcgis(f'{code}, Toronto, Ontario').latlng[0], 5)

['M3A', 43.75245, 43.75245]
['M4A', 43.73057, 43.73057]
['M5A', 43.65512, 43.65512]
['M6A', 43.72327, 43.72327]
['M7A', 43.66253, 43.66253]
['M9A', 43.66263, 43.66263]
['M1B', 43.81139, 43.81139]
['M3B', 43.74923, 43.74923]
['M4B', 43.70718, 43.70718]
['M5B', 43.65739, 43.65739]
['M6B', 43.70687, 43.70687]
['M9B', 43.65034, 43.65034]
['M1C', 43.78574, 43.78574]
['M3C', 43.72168, 43.72168]
['M4C', 43.6897, 43.6897]
['M5C', 43.65215, 43.65215]
['M6C', 43.69211, 43.69211]
['M9C', 43.64857, 43.64857]
['M1E', 43.76575, 43.76575]
['M4E', 43.67709, 43.67709]
['M5E', 43.64536, 43.64536]
['M6E', 43.68784, 43.68784]
['M1G', 43.76812, 43.76812]
['M4G', 43.70902, 43.70902]
['M5G', 43.65609, 43.65609]
['M6G', 43.66869, 43.66869]
['M1H', 43.76944, 43.76944]
['M2H', 43.80225, 43.80225]
['M3H', 43.75788, 43.75788]
['M4H', 43.70142, 43.70142]
['M5H', 43.6497, 43.6497]
['M6H', 43.66505, 43.66505]
['M1J', 43.74446, 43.74446]
['M2J', 43.78097, 43.78097]
['M3J', 43.76476, 43.76476]
['M4J', 43.68811, 43.688

In [83]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188
