In [1]:
from bs4 import BeautifulSoup
import os,io
import parser,urllib.request,requests
from lxml import html,etree
import re
import pandas as pd

import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [2]:
opener = urllib.request.FancyURLopener({})
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
f = pd.read_html(url,header=0)
df = pd.DataFrame(f[0])
df.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


The dataframe will consist of three columns: PostalCode, Borough, and Neighborhood

rename the column to format

In [4]:
df.rename(columns={'Postcode':'PostalCode', 'Neighbourhood':'Neighborhood'},inplace=True)

Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [5]:
df = df[df['Borough'] != 'Not assigned']
df.reset_index(drop=True,inplace=True)

In [6]:
df.shape

(211, 3)

These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

The neighborhood will be combined in accordance to postalcode and borough (grouped).

In [8]:
df = df.groupby(['PostalCode','Borough']).Neighborhood.agg([('Neighborhood',', '.join)])
df.reset_index(inplace=True)

If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough. 

This basically pinpoint where neighborhood content is 'Not assigned' and copy/paste its borough content into it.

In [10]:
df.loc[df[df['Neighborhood']=='Not assigned']['Neighborhood'].index.values[0]]['Neighborhood'] = df.loc[df[df['Neighborhood']=='Not assigned']['Neighborhood'].index.values[0]]['Borough']

In [11]:
df.shape

(103, 3)

Now import the csv file containing the coordinates

In [12]:
coordinates = requests.get("http://cocl.us/Geospatial_data").content
coor = pd.read_csv(io.StringIO(coordinates.decode('utf-8')))
coor.rename(columns={'Postal Code':'PostalCode'},inplace=True)

Now Merge the two

In [13]:
df_2 = pd.merge(df,coor,left_on='PostalCode', right_on='PostalCode')
df_2.drop(['PostalCode'],axis=1,inplace=True)
df_2.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(Latitude, Longitude))

# create map of New York using latitude and longitude values
map_Toronto = folium.Map(location=[Latitude, Longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_2['Latitude'], df_2['Longitude'], df_2['Borough'], df_2['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

The geograpical coordinate of Toronto are 43.653963, -79.387207.
