In [0]:
# Import data
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [0]:
# Get html document
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_doc = requests.get(url)

In [11]:
# Get the text from that document
html_text =  html_doc.text

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>List of postal codes of Canada: M - Wikipedia</title>\n<script>document.documentElement.className=document.documentElement.className.replace(/(^|\\s)client-nojs(\\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":900271985,"wgRevisionId":900271985,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","Au

In [0]:
# Let's turn the html into a BeautfulSoup Class
soup = BeautifulSoup(html_text, 'lxml')

In [0]:
# Let's look for our table of interest
table = soup.find('table',{'class':'wikitable sortable'})

In [0]:
# Let's find the text from the table
data = [row.find_all(text=True) for row in table.find_all('tr')]

In [0]:
# Now let's turn it in into a list so we can start using list methods for cleaning
data_list = [list(row) for row in data]

In [0]:
# This seems like the most elegant way of doing things, a nested list comprehension
data_cleaned = [[elem.strip() for elem in row if elem != '\n'] for row in data_list]

In [78]:
# Let's read it in as a dataframe
df = pd.DataFrame(data_cleaned, columns = data_cleaned[0]).iloc[1:, :]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [0]:
# Group data by Postcode and Borough and change unique values to string seperated by commas.
df_neighbourhood_join = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].unique().str.join(sep = ', ').to_frame()

In [0]:
# Reset index and make Borough 'Not assigned' if the Neigbourhood is 'Not assigned'.
df_neighbourhood_join = df_neighbourhood_join.reset_index()
df_neighbourhood_join.loc[df_neighbourhood_join['Neighbourhood'] == 'Not assigned', 'Borough'] = df_neighbourhood_join['Neighbourhood']

In [113]:
# We'll now read in the geolocation data
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')
geo_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [0]:
# Now let's merge the data
df_geo = df_neighbourhood_join.merge(geo_df, how = 'left', left_on = 'Postcode',right_on = 'Postal Code')

In [135]:
# DataFrame for geolocation data in New York
df_geo.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1A,Not assigned,Not assigned,,,
1,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
4,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917


In [136]:
# Drop rows where Lat and Lon are NA
df_geo = df_geo.dropna(subset=['Latitude', 'Longitude'])
df_geo.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
1,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
4,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
5,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [0]:
!pip install geocoder

In [0]:
import folium
import geocoder
import geopy

In [138]:
address = 'Toronto, Canada'

geolocator = geopy.Nominatim(user_agent='Test app')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
latitude, longitude

(43.653963, -79.387207)

In [0]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_geo['Latitude'], df_geo['Longitude'], df_geo['Borough'], df_geo['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.3,
        parse_html=False).add_to(map_toronto)  

In [132]:
map_toronto