# IBM Data Science | Capstone Project - Adding Latitude and Longitude
Created by: Sangwook Cheon  
Date: June 5, 2019

Let's import libraries


In [1]:
import geocoder
import pandas as pd
from geopy.geocoders import Nominatim


Use geocoer library to get Longitude and Latitude of each postal code in Canada


In [3]:
#Load cleaned dataset created from another notebook
data = pd.read_csv('postal_codes_canada-cleaned.csv', index_col=0)
data.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [4]:
# initialize variable to None
lat_lng_coords = None
postal_codes = data['PostalCode'].tolist()
postal_codes[0:5]


['M3A', 'M4A', 'M5A', 'M6A', 'M7A']

In [5]:
# loop until get the coordinates
# for index, code in enumerate(postal_codes):
#     while(lat_lng_coords is None):
#         g = geocoder.google('{}, Toronto, Ontario'.format(code))
#         lat_lng_coords = g.latlng
#         print(lat_lng_coords)
# 
#     latitude = lat_lng_coords[0]
#     longitude = lat_lng_coords[1]
#     data.loc[index, 'Latitude'] = latitude
#     data.loc[index, 'Longitude'] = longitude
#     print(data.loc[index, 'Longitude'])
#     lat_lng_coords = None
# 
# data.head(10)


The Geocoder library is very unreliable, and in this case it wasn't able to produce proper values, only printing out "None." So, there was no choice but to download a CSV file provided by the course that has all the coordinates. 


In [6]:
coordinates = pd.read_csv('Geospatial_Coordinates.csv')
coordinates.head(10)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [None]:
coordinates.shape


(103, 3)

It seems that the CSV file above is sorted alphabetically according to PostalCode. So, let's sort the other dataframe so that correct Latitude and Longitude values go into each row.


In [None]:
data = data.sort_values(by='PostalCode')
data.reset_index(inplace=True)
data.head(10)


Unnamed: 0,index,PostalCode,Borough,Neighborhood
0,6,M1B,Scarborough,"Rouge, Malvern"
1,12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,22,M1G,Scarborough,Woburn
4,26,M1H,Scarborough,Cedarbrae
5,32,M1J,Scarborough,Scarborough Village
6,38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,58,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [None]:
#Check once more if the order of Postal Codes match up
print(data.loc[:, 'PostalCode'].tolist() == coordinates.loc[:, 'Postal Code'].tolist())


True


In [None]:
#Add Latitude and Longitude columns to the original dataset 
data['Latitude'] = coordinates.loc[:, 'Latitude']
data['Longitude'] = coordinates.loc[:, 'Longitude']


In [None]:
data.head(10)


Unnamed: 0,index,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,12,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,22,M1G,Scarborough,Woburn,43.770992,-79.216917
4,26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,38,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,44,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,51,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,58,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [None]:
#Now, let's re-order the rows so that index is in correct order
data.sort_values(by='index', inplace=True)
data.reset_index(inplace=True)
data.drop(columns=['index'], inplace=True)
data.head(10)


Unnamed: 0,level_0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,25,M3A,North York,Parkwoods,43.753259,-79.329656
1,34,M4A,North York,Victoria Village,43.725882,-79.315572
2,53,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,71,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,85,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,93,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,26,M3B,North York,Don Mills North,43.745906,-79.352188
8,35,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,54,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [14]:
data.drop(columns='level_0', inplace=True)
data.head()


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [15]:
data.to_csv('canada_cpdes_latlng.csv')
