### Part one: Sracpe data from wiki

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
ourUrl=urllib.request.urlopen(url)
soup=BeautifulSoup(ourUrl,'html.parser')

In [4]:
Postcode = []
Borough = []
Neighbourhood = []
tabel = soup.find('table',{'class':'wikitable sortable'})
for i in tabel.find('tbody').find_all('tr'):
    try:
        Postcode.append(i.find_all('th')[0].get_text())
        Borough.append(i.find_all('th')[1].get_text())
        Neighbourhood.append(i.find_all('th')[2].get_text().strip())
    except:
        Postcode.append(i.find_all('td')[0].get_text())
        Borough.append(i.find_all('td')[1].get_text())
        Neighbourhood.append(i.find_all('td')[2].get_text().strip())

In [5]:
df = pd.DataFrame(Postcode[1:],columns = ['Postcode'])
df['Borough'] = Borough[1:]
df['Neighbourhood'] = Neighbourhood[1:]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### Part two: clean and merge the data

In [6]:
# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df1 = df[df['Borough']!='Not assigned'].reset_index(drop=True)
df1

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Not assigned
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [7]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough. 
for i in df1.index:
    if df1['Neighbourhood'][i] == 'Not assigned':
        df1.iloc[i,2] = df1['Borough'][i]
df1

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor
6,M7A,Queen's Park,Queen's Park
7,M9A,Etobicoke,Islington Avenue
8,M1B,Scarborough,Rouge
9,M1B,Scarborough,Malvern


In [8]:
# More than one neighborhood can exist in one postal code area.
Postcode = []
Borough = []
Neighbourhood = []
for i in df1['Postcode'].unique():
    Postcode.append(i)
    Borough.append(df1[df1['Postcode']==i]['Borough'].values[0])
    nstr = ''
    for nei in df1[df1['Postcode']==i]['Neighbourhood'].values:
        nstr += nei
        nstr += ','
    Neighbourhood.append(nstr[:-1])

In [9]:
df2 = pd.DataFrame(Postcode[1:],columns = ['Postcode'])
df2['Borough'] = Borough[1:]
df2['Neighbourhood'] = Neighbourhood[1:]
df2

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4A,North York,Victoria Village
1,M5A,Downtown Toronto,"Harbourfront,Regent Park"
2,M6A,North York,"Lawrence Heights,Lawrence Manor"
3,M7A,Queen's Park,Queen's Park
4,M9A,Etobicoke,Islington Avenue
5,M1B,Scarborough,"Rouge,Malvern"
6,M3B,North York,Don Mills North
7,M4B,East York,"Woodbine Gardens,Parkview Hill"
8,M5B,Downtown Toronto,"Ryerson,Garden District"
9,M6B,North York,Glencairn


### Part three: Generate latitude and longtitude

In [14]:
geo = pd.read_csv('Geospatial_Coordinates.csv')
geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [15]:
Latitude = []
Longitude = []
for i in range(df2.shape[0]):
    Latitude.append(geo[geo['Postal Code']==df2['Postcode'][i]]['Latitude'].values[0])
    Longitude.append(geo[geo['Postal Code']==df2['Postcode'][i]]['Longitude'].values[0])
df2['Latitude'] = Latitude
df2['Longitude'] = Longitude

In [16]:
df2

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4A,North York,Victoria Village,43.725882,-79.315572
1,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.654260,-79.360636
2,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
3,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
4,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
5,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
6,M3B,North York,Don Mills North,43.745906,-79.352188
7,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
8,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937
9,M6B,North York,Glencairn,43.709577,-79.445073


In [18]:
import folium

In [22]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[43.657162,-79.378937], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork