In [366]:
import pandas as pd
import lxml

Calling the dataset

In [367]:
calls_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header = 0)

Transforming in to a DataFrame

In [368]:
table = pd.DataFrame(calls_df[0])
table[0:5]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Excluding 'Not Assigned' Boroughs

In [369]:
table = table[table.Borough !='Not assigned']
table[0:5]

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


Joining Boroughs with same Neighbourhoods

In [370]:
table = table.groupby(['Borough','Postcode'])['Neighbourhood'].apply(', '.join).reset_index()
table = table[['Postcode','Borough','Neighbourhood']]
table[0:5]

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4N,Central Toronto,Lawrence Park
1,M4P,Central Toronto,Davisville North
2,M4R,Central Toronto,North Toronto West
3,M4S,Central Toronto,Davisville
4,M4T,Central Toronto,"Moore Park, Summerhill East"


Processing 'Not assigned' Neighbourhoods

In [371]:
r = table.shape[0]
n = 0
while n != r :
    if table.iat[n, 2] == 'Not assigned' :
       table.iat[n, 2] = table.iat[n, 1]
    else:
        n = n + 1

Checking the results

In [372]:
r = table.shape[0]
n = 0
while n != r :
    if table.iat[n, 2] == 'Not assigned' :
       print('Wrong logic!!')
    else:
        n = n + 1
print('Well done!!')

Well done!!


In [373]:
table.shape

(103, 3)

Calling the geographic locations csv.

In [374]:
calls_geo = pd.read_csv("http://cocl.us/Geospatial_data", header = 0)

Changing the Column name for the future merge.

In [375]:
geo = pd.DataFrame(calls_geo)
geo.rename({'Postal Code': 'Postcode'}, axis=1, inplace=True)
geo[0:5]

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Merging the DataFrames.

In [376]:
table = table.merge(geo, on='Postcode', how='right')
table[0:5]

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
3,M4S,Central Toronto,Davisville,43.704324,-79.38879
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


Building a Map

In [None]:
import folium
from folium.plugins import MarkerCluster
CTO_COORDINATES = (43.728020, -79.388790)
Postcode = table[['Latitude', 'Longitude']]
  
# create empty map zoomed in on San Francisco
map = folium.Map(location=CTO_COORDINATES, zoom_start=12)

mc = MarkerCluster().add_to(map)
# add a marker for every record in the filtered data
for each in Postcode[0:r].iterrows():
    folium.Marker(location = [each[1]['Latitude'],each[1]['Longitude']], clustered_marker = True)


Saving the Map to a html file

In [None]:
mc.save('mc.html')