In [1]:
import pandas as pd
import numpy as np
import urllib.request
from bs4 import BeautifulSoup

In [2]:
# Assign URL.
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
# Get web page
page = urllib.request.urlopen(url)

In [4]:
# Convert into xml
soup = BeautifulSoup(page, 'lxml')

In [5]:
# Select the appropriate content
tables = soup.find('table', class_='wikitable sortable')

In [6]:
# create a table using HTML tags. Use loop to fill the table.
A=[]
B=[]
C=[]
for row in tables.find_all('tr'):
    cells=row.find_all('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [7]:
# Convert table into a Pandas dataframe and rename columns.
df = pd.DataFrame(A,columns=['Postal Code'])
df['Borough'] = B
df['Neighborhood'] = C

In [8]:
# Clean the data, remove unnassigned postal codes.
df=df[df.Borough != 'Not assigned\n']
df.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [9]:
# As requested by the assignment
df.shape

(103, 3)

This completes Part 1 of the Week 3 assignment. 
Part 2 of Week 3 assignment continues below: 

In [10]:
# Read the latitude and longitude of provided postal codes
LL = pd.read_csv("http://cocl.us/Geospatial_data")

In [11]:
LL.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
# Clean the data
df['Postal Code'] = df['Postal Code'].str.strip()
LL['Postal Code'] = LL['Postal Code'].str.strip()
df['Borough'] = df['Borough'].str.strip()

In [13]:
# Merge the two dataframes so we have a complete set to work with.
data = df.merge(LL, on="Postal Code")
data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


This completes Part 2 of the Week 3 assignment of the Data Science Capstone Project.
Continued below is Part 3 of the Week 3 assignment:

In [14]:
# Select only Toronto neighborhoods.
data_T = data[data['Borough'].str.contains('Toronto')]
data_T

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [15]:
# Make things a bit clearer by sorting the data
dataT_sorted = data_T.sort_values('Borough')
dataT_sorted

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
62,M5N,Central Toronto,Roselawn,43.711695,-79.416936
67,M4P,Central Toronto,Davisville North,43.712751,-79.390197
68,M5P,Central Toronto,"Forest Hill North & West, Forest Hill Road Park",43.696948,-79.411307
73,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
74,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
79,M4S,Central Toronto,Davisville,43.704324,-79.38879
83,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
86,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049
61,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
84,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049


We can see that Downtown Toronto makes up the majority of Toronto neighborhoods.

In [16]:
!pip install folium
import folium
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 7.8MB/s ta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    certifi-2020.4.5.2         |   py36h9f0ad1d_0         152 KB  conda-forge
    openssl-1.1.1g   

In [17]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(dataT_sorted['Latitude'], dataT_sorted['Longitude'], dataT_sorted['Borough'], dataT_sorted['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

This completes Part 3 and concludes the Week 3 assignment.