Part 1: Web scrapping

In [1]:
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import geocoder
import folium

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, "lxml")

In [3]:
all_tables=soup.find_all("table")
right_table=soup.find('table', class_='wikitable sortable')
#right_table

In [4]:
A=[]
B=[]
C=[]

#Scrapes wikipedia for the information between HTML tags

for row in right_table.find_all('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))
        

In [5]:

df=pd.DataFrame(A,columns=['Postcode'])
df['Borough']=B
df['Neighbourhood']=C
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [6]:
df = df[df.Borough != 'Not assigned']
df = df[df.Neighbourhood != 'Not assigned']


In [7]:
df2 = df.groupby(by=['Postcode','Borough']).agg(lambda x: ','.join(x))
df2.reset_index(level=['Postcode','Borough'], inplace=True)
df2.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood\n,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae\n


In [8]:
df2.shape

(103, 3)

Part 2: Getting the lat and long values

In [10]:
#create lists converts postal codes to list

lat = []
long = []
codes = df2['Postcode'].tolist()

for postal_code in codes:

# initialize your variable to None
    lat_lng_coords = None

# loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        lat.append(lat_lng_coords[0])
        long.append(lat_lng_coords[1])

df2['Latitude'] = lat
df2['Longitude'] = long

In [11]:
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood\n,Morningside,West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae\n,43.769688,-79.23944


Part 3: visualization

In [13]:
# create map of New York using latitude and longitude values
latitude = 43.70011
longitude = -79.4163
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, long, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto