In [1]:
#using beautifulsoup to extract table data from given webpage
import urllib.request as ur
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np

In [2]:
wiki="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M" #link to wikipedia page
page=ur.urlopen(wiki)
soup=bs(page)

In [3]:
table=soup.find_all('table', class_='wikitable sortable')#finding the right table

In [4]:
'''
first, we find all the rows of the table and store them in separate indices of a list. we then format it correctly by removing whitespaces, newline
characters etc and convert the dataframe to a list so that conducting operations are easy. rows where borough is not assigned are removed from the
dataframe
'''
rows=[]
for row in table[0].find_all('tr'):
    rows.append(row.text.strip())
for i in range(len(rows)):
    rows[i]=rows[i].split('\n')
df=pd.DataFrame(rows)
df.columns=df.iloc[0]
df=df.drop(df.index[0])
df=df[df.Borough != 'Not assigned']
df.columns=['PostalCode', 'Borough', 'Neighborhood']
lst=df.values.tolist()
len(lst)

211

In [9]:
'''
we check to see of the postal codes match. in that case we join the neighborhood names and delete the redundant row
'''
for i in range(1, len(lst)):
    if(lst[i][0]==lst[i-1][0]):
        lst[i][2]=lst[i][2]+','+lst[i-1][2]
        del lst[i-1]

IndexError: list index out of range

In [11]:
len(lst)

103

In [12]:
'''
we check to see if a neighborhood is not assigned. if not, then the neighborhood name becomes the same as the borough name
'''
for i in range(len(lst)):
    if lst[i][2]=='Not assigned':
        lst[i][2]=lst[i][1]

In [37]:
df=pd.DataFrame(lst)
df.columns=['PostalCode', 'Borough', 'Neighborhood']
df_new=df.sort_values(by='PostalCode')
df_new

Unnamed: 0,PostalCode,Borough,Neighborhood
6,M1B,Scarborough,"Malvern,Rouge"
12,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek"
18,M1E,Scarborough,"West Hill,Morningside,Guildwood"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
32,M1J,Scarborough,Scarborough Village
38,M1K,Scarborough,"Kennedy Park,Ionview,East Birchmount Park"
44,M1L,Scarborough,"Oakridge,Golden Mile,Clairlea"
51,M1M,Scarborough,"Scarborough Village West,Cliffside,Cliffcrest"
58,M1N,Scarborough,"Cliffside West,Birch Cliff"


In [38]:
df_new.shape

(103, 3)

In [32]:
!wget -q -O 'toronto_data.csv' https://cocl.us/Geospatial_data

In [33]:
with open('toronto_data.csv') as csv_data:
    toronto_data = pd.read_csv(csv_data)

In [40]:
toronto_data.columns=['PostalCode', 'Latitude', 'Longitude']
toronto_data

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [43]:
df_newll=pd.merge(df_new, toronto_data, on='PostalCode', how='left')
df_newll

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Morningside,Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park,Ionview,East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Oakridge,Golden Mile,Clairlea",43.711112,-79.284577
8,M1M,Scarborough,"Scarborough Village West,Cliffside,Cliffcrest",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West,Birch Cliff",43.692657,-79.264848


In [60]:
#!conda install -c conda-forge folium=0.5.0

In [61]:
#!conda install -c conda-forge geopy

In [62]:
from geopy.geocoders import Nominatim
address = 'Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [63]:
import folium
# create map of toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_newll['Latitude'], df_newll['Longitude'], df_newll['Borough'], df_newll['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [64]:
scar_data = df_newll[df_newll['Borough'] == 'Scarborough'].reset_index(drop=True)
scar_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern,Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Port Union,Rouge Hill,Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Morningside,Guildwood",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [65]:
address = 'Scarborough, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Scarborough are 43.7169869, -79.2546806.


In [66]:
# create map of Scarborough using latitude and longitude values
map_scar = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(scar_data['Latitude'], scar_data['Longitude'], scar_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scar)  
    
map_scar