# PART 1

## Imports libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Reference the data on wikipedia and create soup object

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

## Begin data analysis

In [3]:
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    if (index == 0):
        columns = section
    else:
        data.append(section)

toronto_df = pd.DataFrame(data = data,columns = columns)
toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Remove 'Not assigned' Neighborhoods


In [4]:
toronto_df1 = toronto_df[toronto_df.Borough != 'Not assigned'].reset_index(drop=True)
toronto_df1.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [5]:
#group same neighborhood together

toronto_df2 = toronto_df1.groupby(['Postcode','Borough'], as_index=False).agg(lambda x: ','.join(x))
toronto_df2.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [6]:
#remove duplicates from the records

toronto_df2 = toronto_df2.drop_duplicates()
if(toronto_df2.index.name != 'Postcode'):
    toronto_df2 = toronto_df2.set_index('Postcode')
    
toronto_df2.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [7]:
toronto_df2.info

<bound method DataFrame.info of     Postcode      Borough                                       Neighborhood
0        M1B  Scarborough                                      Rouge,Malvern
1        M1C  Scarborough               Highland Creek,Rouge Hill,Port Union
2        M1E  Scarborough                    Guildwood,Morningside,West Hill
3        M1G  Scarborough                                             Woburn
4        M1H  Scarborough                                          Cedarbrae
..       ...          ...                                                ...
98       M9N         York                                             Weston
99       M9P    Etobicoke                                          Westmount
100      M9R    Etobicoke  Kingsview Village,Martin Grove Gardens,Richvie...
101      M9V    Etobicoke  Albion Gardens,Beaumond Heights,Humbergate,Jam...
102      M9W    Etobicoke                                          Northwest

[103 rows x 3 columns]>

# Assign the non assigned neighborhood

In [8]:
na_neighbor_rows = toronto_df2.Neighborhood == 'Not assigned'
toronto_df2.loc[na_neighbor_rows, 'Neighborhood'] = toronto_df2.loc[na_neighbor_rows, 'Borough']
toronto_df2[na_neighbor_rows]

Unnamed: 0,Postcode,Borough,Neighborhood
85,M7A,Queen's Park,Queen's Park


##  Final number of rows of the dataframe


In [9]:
toronto_df2.shape

(103, 3)

# PART 2

In [10]:
import pandas as pd
import numpy as np

from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import csv
import urllib
import requests

In [11]:
import urllib.request
 
myProxy = urllib.request.ProxyHandler({'http': '127.0.0.2'})
 
openProxy = urllib.request.build_opener(myProxy)
 
urllib.request.urlretrieve("http://cocl.us/Geospatial_data", filename="toronto_coordinates.csv")

('toronto_coordinates.csv', <http.client.HTTPMessage at 0x256d60d36d8>)

In [12]:
coordinates = pd.read_csv('toronto_coordinates.csv')

In [13]:
coordinates.shape

(103, 3)

In [14]:
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge the dataframes of PART 1 and PART 2

In [15]:
toronto_df2.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [16]:
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
toronto_df2_merge = toronto_df2.set_index('Postcode')
coordinates_merge = coordinates.set_index('Postal Code')
toronto_df_merger = pd.concat([toronto_df2_merge, coordinates_merge], axis=1, join='inner')

toronto_df_merger.index.name = 'Postal Code'
toronto_df_merger.reset_index(inplace=True)

print(toronto_df_merger.shape)

toronto_df_merger.head()

(103, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# PART 3

In [26]:
pip install Nominatim

Collecting Nominatim
  Downloading https://files.pythonhosted.org/packages/59/f2/d47726f804208804f7f295e20a9d2ee4ea925fb6462481223464026bcf66/nominatim-0.1.tar.gz
Building wheels for collected packages: Nominatim
  Building wheel for Nominatim (setup.py): started
  Building wheel for Nominatim (setup.py): finished with status 'done'
  Created wheel for Nominatim: filename=nominatim-0.1-cp37-none-any.whl size=2368 sha256=1643038a688dff4326dd5dbdf22fdc54a47d5118c0d63ca56522d0b403484d52
  Stored in directory: C:\Users\aa1316\AppData\Local\pip\Cache\wheels\d5\66\ed\e7476981dc30210b6b5ce7c25b054e8db35d44fdd2198003d4
Successfully built Nominatim
Installing collected packages: Nominatim
Successfully installed Nominatim-0.1
Note: you may need to restart the kernel to use updated packages.


In [29]:
pip install geopy

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
Collecting geographiclib<2,>=1.49
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.20.0
Note: you may need to restart the kernel to use updated packages.


In [31]:
# print the coordinates of Toronto city

from geopy import Nominatim

address = 'Toronto, Ontario'

geo_locator = Nominatim(user_agent="tl-toronto-neigh")
location = geo_locator.geocode(address)
lat = location.latitude
long = location.longitude

print('The coordinates of Toronto city are {}, {}.'.format(lat, long))

The coordinates of Toronto city are 43.653963, -79.387207.


# Use Folium to create a Map of Toronto

In [48]:
import folium

toronto_map = folium.Map(location=[lat, long], zoom_start=10)

for lat, long, post, borough, neigh in zip(toronto_df_merger['Latitude'], toronto_df_merger['Longitude'], toronto_df_merger['Postal Code'], toronto_df_merger['Borough'], toronto_df_merger['Neighborhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=4,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='gray',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
print(toronto_map)

toronto_map

<folium.folium.Map object at 0x00000256D8D16198>


In [50]:
# display the information of all Toronto borough

toronto_borough = ['Central Toronto', 'Downtown Toronto', 'East Toronto', 'West Toronto']
toronto_central_df = toronto_df_merger[toronto_df_merger['Borough'].isin(toronto_borough)].reset_index(drop=True)

print(toronto_central_df.shape)

toronto_central_df.head()

(39, 5)


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
