In [1]:
import pandas as pd
import numpy as np

In [6]:
#Scraping the data into a pandas dataframe
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")[0]
df.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [7]:
#Excluding rows where Borough is not assigned
condition = df["Borough"] != "Not assigned"
df = df[condition]
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [17]:
#Grouping cells by Postal Code
df_grouped = df.groupby('Postal Code', sort=False).agg(', '.join)
df_grouped.head(10)

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
M1B,Scarborough,"Malvern, Rouge"
M3B,North York,Don Mills
M4B,East York,"Parkview Hill, Woodbine Gardens"
M5B,Downtown Toronto,"Garden District, Ryerson"


In [20]:
#If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough
condition = df_grouped['Neighbourhood'] =='Not assigned'
df_grouped.loc[condition, 'Neighbourhood'] = df_grouped.loc[condition, 'Borough']

df_grouped.reset_index(inplace=True)
df_grouped.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [None]:
df_grouped.shape

<h1>Part 2: Geocoding</h1>

In [22]:
!pip install geocoder
import geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [23]:
#Adding new columns to store the values of Latitude and Longitude
df_grouped['Longitude'] = None
df_grouped['Latitude'] = None

In [25]:
#Now, assigning lat & long values

for i, pc in enumerate(df_grouped['Postal Code']):
    lat_lng_coords = None
    
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(pc))
        lat_lng_coords = g.latlng
    
    if lat_lng_coords:
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
    
    df_grouped.loc[i, 'Latitude'] = latitude
    df_grouped.loc[i, 'Longitude'] = longitude


In [27]:
df_grouped.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Longitude,Latitude
0,M3A,North York,Parkwoods,-79.3299,43.7525
1,M4A,North York,Victoria Village,-79.3131,43.7306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",-79.3626,43.6551
3,M6A,North York,"Lawrence Manor, Lawrence Heights",-79.4504,43.7233
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",-79.3919,43.6625
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",-79.5283,43.6626
6,M1B,Scarborough,"Malvern, Rouge",-79.1966,43.8114
7,M3B,North York,Don Mills,-79.3619,43.7492
8,M4B,East York,"Parkview Hill, Woodbine Gardens",-79.3119,43.7072
9,M5B,Downtown Toronto,"Garden District, Ryerson",-79.378,43.6574
