## Appending Postal Code with Lat/Long Information

#### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
!conda install -c conda-forge folium=0.5.0 --yes 
import folium 
!conda install -c conda-forge geocoder --yes
import geocoder

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



#### Reading the Data

In [3]:
df = pd.read_pickle("NeigborhoodTable.pkl")

In [4]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
print('The data has {} Postal Codes, {} boroughs and {} neighborhoods.'.format(
    len(df['Postal Code'].unique()),
    len(df['Borough'].unique()),
    len(df['Neighborhood'].unique())
    )
)

The data has 103 Postal Codes, 10 boroughs and 99 neighborhoods.


#### Getting the Lat/Long data using Geolocator and Geocoder 

In [19]:
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode('Toronto, Ontario')
latitude_city=location.latitude
longitude_city=location.longitude
print('The co-ordinates of Toronto, ON are {} latitude and {} longitude.'.format(location.latitude,location.longitude))

The co-ordinates of Toronto, ON are 43.6534817 latitude and -79.3839347 longitude.


In [6]:
count=0
for i in range(0,10):
    geolocator = Nominatim(user_agent="toronto_explorer")
    location = geolocator.geocode('{}, Toronto, Ontario.'.format(df['Postal Code'][i]))
    if(location==None):
        count=count+1
    else:
        print(location, df['Postal Code'][i],location.latitude,location.longitude)
        
print("Number of zip codes with no available location info =" +  str(count))

Toronto, Golden Horseshoe, Ontario, M5H 2N2, Canada M3A 43.6534817 -79.3839347
Toronto, Golden Horseshoe, Ontario, M5H 2N2, Canada M7A 43.6534817 -79.3839347
Toronto, Golden Horseshoe, Ontario, M5H 2N2, Canada M1B 43.6534817 -79.3839347
Number of zip codes with no available location info =7


In [7]:
counting=0
for i in range(0,10):
    lat_lng_coords = None
    counter=0

    #loop until you get the coordinates
    while(lat_lng_coords is None):
        counter=counter+1
        g = geocoder.google('{}, Toronto, Ontario'.format(df['Postal Code'][i]))
        lat_lng_coords = g.latlng
        if counter==10:
            break
    if lat_lng_coords != None:
        print(df['Postal Code'][i],g)
    else:
        counting=counting+1
#    latitude = lat_lng_coords[0]
#    longitude = lat_lng_coords[1]

print("Number of zip codes with no available location info =" +  str(counting))


Number of zip codes with no available location info =10


### The above methods aren't able to provide the lat/long data corrrectly

#### Using the geo spatial co-ordinates data

In [8]:
geo_spatial=pd.read_csv("Geospatial_Coordinates.csv")

In [9]:
geo_spatial.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df=df.merge(geo_spatial, on='Postal Code', how='left')

#### Checking for missing data

In [13]:
df.Latitude.isna().value_counts()

False    103
Name: Latitude, dtype: int64

In [14]:
df.Longitude.isna().value_counts()

False    103
Name: Longitude, dtype: int64

In [15]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


#### Visualize on a map

In [25]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude_city, longitude_city], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Saving data to pickle file

In [16]:
df.to_pickle("NeigborhoodCoordinates.pkl")

In [17]:
df.shape

(103, 5)