I imported several libraries to scrape the wikipedia website.

In [146]:
import urllib.request
import pandas as pd
from bs4 import BeautifulSoup

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page, "lxml")
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"6824922e-a2d6-4843-8ce2-eff849d86504","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":960187814,"wgRevisionId":960187814,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toron

The following 4 cells made the html to a pandas dataframe.

In [147]:
all_tables=soup.find_all("table")

In [148]:
right_table=soup.find('table', class_='wikitable sortable')

In [149]:
A=[]
B=[]
C=[]

for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True)[:-1])
        B.append(cells[1].find(text=True)[:-1])
        C.append(cells[2].find(text=True)[:-1])

In [150]:
df=pd.DataFrame(A,columns=['Postal_Code'])
df['Borough']=B
df['Neighborhoods']=C
df.head()

Unnamed: 0,Postal_Code,Borough,Neighborhoods
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


The next cell dropped all the rows which have a 'Not assigned' borough.

In [151]:
import numpy as np
df.replace("Not assigned", np.nan, inplace = True)
df.dropna(subset=["Borough"], axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Postal_Code,Borough,Neighborhoods
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [152]:
df.shape

(103, 3)

In [153]:
longitude = np.zeros(103,)
latitude = np.zeros(103,)
df_lonla = pd.read_csv('Geospatial_Coordinates.csv')

In [154]:
a = df_lonla[df_lonla['Postal Code']=='M3A']['Longitude']

# loop until you get the coordinates
for i, j in enumerate(df['Postal_Code'].values):
    longitude[i] = df_lonla[df_lonla['Postal Code']== j]['Longitude']
    latitude[i] = df_lonla[df_lonla['Postal Code']== j]['Latitude']

df['Latitude'] = latitude
df['Longitude'] = longitude
df.head()    

Unnamed: 0,Postal_Code,Borough,Neighborhoods,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [161]:
import folium

I decided to work with only boroughs that contain the word __Toronto__.

In [159]:
#df['contain_toronto'] = ('Toronto' in df['Borough'])
#df.head()
Contain_Toronto = np.zeros(103,)
for i in range(103):
    if 'Toronto' in df['Borough'][i]:
        Contain_Toronto[i] = 1
    else:
        Contain_Toronto[i] = 0
df['Contain_Toronto'] = Contain_Toronto

In [160]:
df.replace(0, np.nan, inplace = True)
df_new = df.dropna(subset=["Contain_Toronto"], axis=0)

df_new.drop(['Contain_Toronto'], axis = 1, inplace = True)
df_new

Unnamed: 0,Postal_Code,Borough,Neighborhoods,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [166]:
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start = 10)

for lat, lng, borough, neighborhood in zip(df_new['Latitude'], df_new['Longitude'], df_new['Borough'], df_new['Neighborhoods']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto