In [21]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import lxml.html as lh
import numpy as np

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
geopy                     1.18.1                     py_0    conda-forge
Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


In [22]:
## Connecting to the Url and Checking the status
response = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
response.status_code

200

In [23]:
## Using Pandas and BeautifulSoup to Parse Table from the site

soup = BeautifulSoup(response.content,'html.parser')

table = soup.find_all('table')
table = table[0]

## Initialise empty column header list and df list
columns_headers = []
df = []

## Capture the column headings from the table

for row in table.find_all('tr'):
    for cell in row.find_all('th'):
        columns_headers.append(cell.text.strip())
        
## Capture the table rows data

for tr in table.find_all('tr'):
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        df.append(row)

## Converting the list to Datafram

df = pd.DataFrame(df,columns = columns_headers)

## Print the Size of Data Frame

print("Size of Data Frame before :",df.shape,'\n')

## Drop all the rows where Borough is Not assigned

df.drop(df[df.Borough == 'Not assigned'].index,inplace=True)
df.index = range(len(df.index))

## Assign Borough to Neighbourhood where Neighbourhood is Not assigned

df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'],df['Neighbourhood'])

## Groupby PostCode

grouped = df.groupby(['Postcode','Borough']).agg(lambda col: ','.join(col)).reset_index()

print(grouped.head(),'\n')

## Load longitude and Latitude data for Toronoto from csv file
lat_lng = pd.read_csv("http://cocl.us/Geospatial_data")
lat_lng.head()

## Join  the two dataframes and reorder the columns

result = pd.concat([lat_lng,grouped],axis=1)
result.drop(columns='Postal Code',inplace=True)
cols = result.columns.tolist()
cols = cols[2:] + cols[:2]
result = result[cols]

## Print the result dataframe shape

print("Size of Data Frame After :",result.shape)
result.head(10)

Size of Data Frame before : (289, 3) 

  Postcode      Borough                         Neighbourhood
0      M1B  Scarborough                         Rouge,Malvern
1      M1C  Scarborough  Highland Creek,Rouge Hill,Port Union
2      M1E  Scarborough       Guildwood,Morningside,West Hill
3      M1G  Scarborough                                Woburn
4      M1H  Scarborough                             Cedarbrae 

Size of Data Frame After : (103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [24]:
##  Using Pandas to Parse Table from the site

dfs = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",header=0)

df = dfs[0]
print("Size of Data Frame before :",df.shape,'\n')
df.head()
df.drop(df[df.Borough == 'Not assigned'].index,inplace=True)
df.index = range(len(df.index))
df.head(10)

## Assign Borough to Neighbourhood where Neighbourhood is Not assigned

df['Neighbourhood'] = np.where(df['Neighbourhood'] == 'Not assigned', df['Borough'],df['Neighbourhood'])
df.head(10)

## Groupby PostCode

grouped = df.groupby(['Postcode','Borough']).agg(lambda col: ','.join(col)).reset_index()
print(grouped.head(),'\n')

## Load longitude and Latitude data for Toronoto from csv file
lat_lng = pd.read_csv("http://cocl.us/Geospatial_data")
lat_lng.head()

## Join  the two dataframes and reorder the columns

result = pd.concat([lat_lng,grouped],axis=1)
result.drop(columns='Postal Code',inplace=True)
cols = result.columns.tolist()
cols = cols[2:] + cols[:2]
result = result[cols]

## Print the result dataframe shape

print("Size of Data Frame After :",result.shape)
result.head(10)

Size of Data Frame before : (289, 3) 

  Postcode      Borough                         Neighbourhood
0      M1B  Scarborough                         Rouge,Malvern
1      M1C  Scarborough  Highland Creek,Rouge Hill,Port Union
2      M1E  Scarborough       Guildwood,Morningside,West Hill
3      M1G  Scarborough                                Woburn
4      M1H  Scarborough                             Cedarbrae 

Size of Data Frame After : (103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


In [25]:
address = 'Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  app.launch_new_instance()


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [26]:
# create map and display it
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# display the map of Toronto
Toronto_map

In [88]:
latitudes = list(result.Latitude)
longitudes = list(result.Longitude)
labels = list(result.Postcode)

from folium import plugins

# let's start again with a clean copy of the map of Toronto
Toronto_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

# instantiate a mark cluster object
Postcode = plugins.MarkerCluster().add_to(Toronto_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(latitudes, longitudes, labels):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(Postcode)

# display map
Toronto_map
