# Title

## Part 1: Scrape and clean the data from wikipedia into a pandas DataFrame

In [1]:
from bs4 import BeautifulSoup
import requests
import lxml
import pandas as pd

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [52]:
soup = BeautifulSoup(website_url,'lxml')
#print(soup.prettify())

In [4]:
wiki_table = soup.find('table')
wiki_headers= wiki_table.findAll('th')
wiki_data= wiki_table.findAll('td')

In [5]:
clean_data = [data.text.strip() for data in wiki_data]
clean_headers = [data.text.strip() for data in wiki_headers]

In [6]:
clean_data[0],clean_data[1],clean_data[2]

('M1A', 'Not assigned', '')

In [7]:
Postal_code_list = []
Borough_list = []
Neighborhood_list = []

for i, data in enumerate(clean_data):
    if i % 3 == 0:
        Postal_code_list.append(data.replace(" /",","))
    elif i % 3 == 1:
        Borough_list.append(data.replace(" /",","))
    elif i % 3 == 2:
        Neighborhood_list.append(data.replace(" /",","))       

In [8]:
len(Postal_code_list),len(Borough_list),len(Neighborhood_list)

(180, 180, 180)

In [42]:
df = pd.DataFrame()
df[clean_headers[0]]=Postal_code_list
df[clean_headers[1]]=Borough_list
df[clean_headers[2]]=Neighborhood_list
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [43]:
df.shape

(180, 3)

In [44]:
df = df[~df.Borough.str.contains("Not assigned")]
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [45]:
df.shape

(103, 3)

In [55]:
df.reset_index(drop=True,inplace=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


## Part 2- Add columns for Latitude and Longitude for each postal code

In [57]:
!pip install geocoder
import geocoder # import geocoder



In [70]:
latitude_list=[]
longitude_list=[]

for code in df['Postal code']:
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(code))
        lat_lng_coords = g.latlng
    latitude_list.append(lat_lng_coords[0])
    longitude_list.append(lat_lng_coords[1])

In [71]:
len(latitude_list),len(longitude_list)

(103, 103)

In [104]:
df['Latitude']=latitude_list
df['Longitude']=longitude_list
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.31189
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723265,-79.451211
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939


In [103]:
df.shape

(103, 5)

In [83]:
#Test to verify using the postal code= 'M3A'
lat_lng_coords = None
while(lat_lng_coords is None):
    g = geocoder.arcgis('{}, Toronto, Ontario'.format('M3A'))
    lat_lng_coords = g.latlng
(lat_lng_coords[0],lat_lng_coords[1])

(43.75293455500008, -79.33564142299997)

## Part 3

In [99]:
!pip install folium
import folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 8.8MB/s eta 0:00:011
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


In [112]:
tor_latitude = df.Latitude.mean()
tor_longitude = df.Longitude.mean()
map_toronto = folium.Map(location=[tor_latitude, tor_longitude], zoom_start=10)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [113]:
df_toronto_only = df[df.Borough.str.contains("Toronto")]
df_toronto_only.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.650964,-79.353041
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.66179,-79.38939
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657491,-79.377529
15,M5C,Downtown Toronto,St. James Town,43.651734,-79.375554
19,M4E,East Toronto,The Beaches,43.678148,-79.295349


In [114]:
df_toronto_only.shape

(39, 5)

In [116]:
tor_latitude = df_toronto_only.Latitude.mean()
tor_longitude = df_toronto_only.Longitude.mean()
map_toronto = folium.Map(location=[tor_latitude, tor_longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(df_toronto_only['Latitude'], df_toronto_only['Longitude'], df_toronto_only['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [118]:
# The code was removed by Watson Studio for sharing.