# Part 1

In [72]:
import pandas as pd
import requests
import re

# Scrape the Wikipedia page with the list of Postal codes within the city of Toronto
# The table associates postcode, borough and neighbourhood
page_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = requests.get(page_url)
content = response.content

# Save the result as a string
content = str(content)

In [73]:
# Search the values of the table from the content of the page
# and save them in "locations"
lines = re.findall('<tr>(.*?)</tr>', content)
locations = []
for i in range(1,len(lines) - 4):
    line = re.findall('<td>(.*?)</td>', lines[i])
    location = []
    for value in range(3):
        if '<' in line[value]:
            location.append(re.findall('>(.*?)<',line[value])[0])
        else:
            location.append(line[value])
    locations.append(location)

# Transform locations into a DataFrame
locations = pd.DataFrame(locations)
locations.columns = ['PostalCode','Borough','Neighborhood']

In [74]:
locations.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [75]:
# Delete "\n", "\" and "Not assigned" from the values in the DataFrame
locations = locations.replace(r'\\n?','', regex=True)
locations = locations.replace(r'Not assigned','')

locations.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [76]:
# Delete the rows where there is no borough
locations = locations[locations["Borough"] != ""]
locations.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [77]:
# If a cell has a borough but an empty neighborhood, then 
# the neighborhood will be replaced by the borough. 
locations.loc[locations["Neighborhood"] =="", "Neighborhood"] = locations.loc[locations["Neighborhood"] ==""]["Borough"]

# Part 2

In [78]:
# Read the csv-file with the latitude and longitude from the postal codes.
geospatial_coord = pd.read_csv("Geospatial_Coordinates.csv")
geospatial_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [79]:
locations.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [80]:
# Merge locations and geospatial_coord on "PostalCode" and "Postal Code"
# to get a DataFrame with them both
locations = locations.merge(geospatial_coord, left_on = "PostalCode", right_on = "Postal Code")

In [81]:
locations.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,M5A,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,M6A,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,M6A,43.718518,-79.464763


In [82]:
# Postal code is twice in the table
# delete one
locations = locations.drop("Postal Code", axis = 1)
locations.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Heights,43.718518,-79.464763
4,M6A,North York,Lawrence Manor,43.718518,-79.464763


In [84]:
locations.shape

(210, 5)

# Part 3


In [71]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
# Search the latitude and longitude of Toronto
# Sometimes the code doesn't work due to time out of geolocator
# In case it doesn't work, the location is also hard coded.
address = 'Toronto'
try: 
    geolocator = Nominatim(user_agent="tr_explorer")
    loc = geolocator.geocode(address)
    latitude = loc.latitude
    longitude = loc.longitude
except:
    latitude = 43.653963
    longitude = -79.387207
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [89]:
import folium # map rendering library

# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(locations['Latitude'], locations['Longitude'], locations['Borough'], locations['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto