In [5]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import folium
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

print("Modules imported.")

Modules imported.


In [6]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})

find_headers = My_table.find_all("th")
headings = [th.text.strip() for th in find_headers]
headings

['Postcode', 'Borough', 'Neighbourhood']

In [7]:
table_classes = {"class": ["sortable"]}
wikitables = soup.findAll("table", table_classes)

df_temp = []
for tr in My_table.find_all('tr'):
        tds = tr.find_all('td')
        if not tds:
            continue
        postcode, borough, neighbourhood = [td.text.strip() for td in tds]
        df_temp.append(', '.join([postcode, borough, neighbourhood]))
        #print(', '.join([postcode, borough, neighbourhood]))
df_temp[0:5]

['M1A, Not assigned, Not assigned',
 'M2A, Not assigned, Not assigned',
 'M3A, North York, Parkwoods',
 'M4A, North York, Victoria Village',
 'M5A, Downtown Toronto, Harbourfront']

In [8]:
mylist_temp = pd.Series(df_temp)
mylist_split = mylist_temp.str.split(",")
mylist = mylist_split.tolist()

df_wiki = pd.DataFrame(mylist, columns=headings)
df_wiki = df_wiki.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df_wiki.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [9]:
df_wiki["Flag"] = np.where(df_wiki["Borough"] == "Not assigned", 1,0)
df_wiki["Neighbourhood"] = np.where(df_wiki["Neighbourhood"] == 'Not assigned', df_wiki.Borough, df_wiki.Neighbourhood)
df_na = df_wiki[df_wiki.Flag == 0].reset_index()
df_na.head()

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Flag
0,2,M3A,North York,Parkwoods,0
1,3,M4A,North York,Victoria Village,0
2,4,M5A,Downtown Toronto,Harbourfront,0
3,5,M5A,Downtown Toronto,Regent Park,0
4,6,M6A,North York,Lawrence Heights,0


In [10]:
df_na = df_na.drop("Flag", axis=1).groupby(["Postcode", "Borough"]).agg(', '.join).reset_index()


In [11]:
df_geo = pd.read_csv(r'C:\Users\micheal.zamayeri\OneDrive - Travelport\Private\Data Files\Geospatial_Coordinates.csv')
df_geo.rename(columns= {"Postal Code": "Postcode"}, inplace=True)
df_geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
df = pd.merge(df_na, df_geo, on='Postcode', how='left')
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [13]:
address = 'Toronto'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of {} are {}, {}.'.format(address,latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [14]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto