<H1>Neighborhoods in Toronto Week 3 Part 2 - Stephane Faure</H1>

<p>Imports...</p>


In [1]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

<p>Loading data</p>

In [2]:
doc_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(doc_url).text
soup = BeautifulSoup(source, 'lxml')

<p>Looking for the table and diving into its body</p>

In [3]:
#Since there is only one table and its class is "wikitable sortable", let's extract it
table = soup.find("table",class_="wikitable sortable")
table_body = soup.find("tbody")


In [4]:
#Extract all raws
table_rows = table_body.find_all("tr")
# Preparing a dictionnary with postal code as key
cp_dict=dict()

# Loop over postocde list excluding the fist line (title)
for wiki_postcode in table_rows[1:]:
    postcode_data = wiki_postcode.find_all("td")
    # Removong rows with not assigned entries
    if postcode_data[1].text.rstrip() == "Not assigned" or postcode_data[2].text.rstrip()== "Not assigned" :    
        continue
  
    postcode = postcode_data[0].text
    # Process the case where there is a href or not in borough description
    if postcode_data[1].find("a"):
        borough = postcode_data[1].find("a").text
    else:
        borough = postcode_data[1].text

    neighborough_data = postcode_data[2].text.rstrip()

    # Update dictionnary according if there is already an entry for the postal code or if it needs to be created
    if postcode in cp_dict:
        cp_dict[postcode][1]=cp_dict[postcode][1]+","+neighborough_data
    else:
        cp_dict[postcode]=[borough,neighborough_data]

<p>Create DataFrame</p>

In [5]:
#Change dictionary into a list
cplist=[]
for i in cp_dict.keys():
  cplist.append((i,cp_dict[i][0],cp_dict[i][1]))
# Load Dataframe from list
cols = ["Postcode", "Borough", "Neiborough"]
cp = pd.DataFrame.from_records(cplist, columns=cols)


<p>Display the shape of the DataFrame and save it</p>

In [6]:
print(cp.shape)
cp.to_csv("Toronto.csv")
cp.head()

(102, 3)


Unnamed: 0,Postcode,Borough,Neiborough
0,M3N,North York,Downsview Northwest
1,M1E,Scarborough,"Guildwood]],Morningside,West Hill"
2,M5G,Downtown Toronto,Central Bay Street
3,M6H,West Toronto,"Dovercourt Village,Dufferin"
4,M1W,Scarborough,"L'Amoreaux West,Steeles West"


<h1>Part 2</h1>

In [7]:
# install Geocoder
!conda install -c conda-forge geocoder
import geocoder

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geocoder                  1.38.1                     py_0    conda-forge


<p>Make coordonates lookup a function</p>

In [8]:
def getCoordsfromPostalCode(input_pc):
    lat_lng_coords = None
    
    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(input_pc))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    return latitude,longitude

<p>Add columns to original dataframe and update them</p>

In [9]:
cp["Latitude"] = np.nan
cp["Longitude"] = np.nan

for i in cp_dict.keys():
    lat,lon = getCoordsfromPostalCode(i)
  
    cp.loc[cp["Postcode"] == i, ["Latitude"]] = lat
    cp.loc[cp["Postcode"] == i, ["Longitude"]] = lon    


<P>Verify coordonates has been added to the DataFrame</p>

In [10]:
cp.head()

Unnamed: 0,Postcode,Borough,Neiborough,Latitude,Longitude
0,M3N,North York,Downsview Northwest,43.761631,-79.520999
1,M1E,Scarborough,"Guildwood]],Morningside,West Hill",43.763573,-79.188711
2,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
3,M6H,West Toronto,"Dovercourt Village,Dufferin",43.669005,-79.442259
4,M1W,Scarborough,"L'Amoreaux West,Steeles West",43.799525,-79.318389


In [11]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed and imported!')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Folium installed and imported!


In [12]:
# define the world map centered around Toronto, Canada with a higher zoom level
# Toronto https://github.com/jasonicarter/toronto-geojson/blob/master/toronto_crs84.geojson
tr_map = folium.Map(location=[43.653908,-79.384293], zoom_start=11)
cp.set_index("Postcode")
tr_feature_group = folium.map.FeatureGroup()

# Add post code locations on the map
for i in cp_dict.keys():
    # folium_location=[cp.loc[cp["Postcode"] == i, cp["Postcode"]["Latitude"]],cp.loc[cp["Postcode"] == i, ["Longitude"]]]
    lat = cp.loc[cp["Postcode"] == i, ["Latitude"]].get_values()
    lon = cp.loc[cp["Postcode"] == i, ["Longitude"]].get_values() 
    folium_location=[lat[0][0],lon[0][0]]
    # Add one marker per postal code location
    tr_feature_group.add_child(folium.Marker(location=folium_location,popup=i))

tr_feature_group.add_to(tr_map)

tr_map

In [13]:
tr_map.save("toronto_map.html")

In [16]:
from IPython.display import HTML
HTML(filename="toronto_map.html")
