## In Part 1 we have built a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name.
## In this section we will add the latitude and the longitude coordinates of each neighborhood.
### For this we will use the Geocoder Python package instead: https://geocoder.readthedocs.io/index.html.

# Part 0: Extracting the table again

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

wikipedia_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wikipedia_page= requests.get(wikipedia_link).text

soup = BeautifulSoup(raw_wikipedia_page,'lxml')
#print(soup.prettify())


In [5]:
table = soup.find('table')

Postcode      = []
Borough       = []
Neighborhood  = []

# print(table)

for tr_cell in table.find_all('tr'):
    
    counter = 1
    Postcode_var      = -1
    Borough_var       = -1
    Neighborhood_var  = -1
    
    for td_cell in tr_cell.find_all('td'):
        if counter == 1: 
            Postcode_var = td_cell.text
        if counter == 2: 
            Borough_var = td_cell.text
            tag_a_Borough = td_cell.find('a')
            
        if counter == 3: 
            Neighborhood_var = str(td_cell.text).strip()
            tag_a_Neighborhood = td_cell.find('a')
            
        counter +=1
        
    if (Postcode_var == 'Not assigned' or Borough_var == 'Not assigned' or Neighborhood_var == 'Not assigned'): 
        continue
    try:
        if ((tag_a_Borough is None) or (tag_a_Neighborhood is None)):
            continue
    except:
        pass
    if(Postcode_var == -1 or Borough_var == -1 or Neighborhood_var == -1):
        continue
        
    Postcode.append(Postcode_var)
    Borough.append(Borough_var)
    Neighborhood.append(Neighborhood_var)
    

In [6]:
unique_p = set(Postcode)
# print('num of unique Postal codes:', len(unique_p))
Postcode_u      = []
Borough_u       = []
Neighborhood_u = []


for postcode_unique_element in unique_p:
    p_var = ''; b_var = ''; n_var = ''; 
    for postcode_idx, postcode_element in enumerate(Postcode):
        if postcode_unique_element == postcode_element:
            p_var = postcode_element;
            b_var = Borough[postcode_idx]
            if n_var == '': 
                n_var = Neighborhood[postcode_idx]
            else:
                n_var = n_var + ', ' + Neighborhood[postcode_idx]
    Postcode_u.append(p_var)
    Borough_u.append(b_var)
    Neighborhood_u.append(n_var)


In [7]:
toronto_dict = {'Postcode':Postcode_u, 'Borough':Borough_u, 'Neighborhood':Neighborhood_u}
df_toronto = pd.DataFrame.from_dict(toronto_dict)
#df_toronto.to_csv('toronto_part1.csv')
df_toronto.head(14)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M9A,Etobicoke,Islington Avenue
1,M4H,East York,Thorncliffe Park
2,M1B,Scarborough,"Rouge, Malvern"
3,M9W,Etobicoke,Northwest
4,M9L,North York,Humber Summit
5,M4Y,Downtown Toronto,Church and Wellesley
6,M9N,York,Weston
7,M3J,North York,"Northwood Park, York University"
8,M2H,North York,Hillcrest Village
9,M2J,North York,Henry Farm


# Part 1: Pre-processing - Installing geocoder

In [17]:
!pip install geocoder
"""
Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
Requirement already satisfied: click in c:\programdata\anaconda3\lib\site-packages (from geocoder) (6.7)
Requirement already satisfied: six in c:\programdata\anaconda3\lib\site-packages (from geocoder) (1.11.0)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Requirement already satisfied: requests in c:\programdata\anaconda3\lib\site-packages (from geocoder) (2.19.1)
Requirement already satisfied: future in c:\programdata\anaconda3\lib\site-packages (from geocoder) (0.16.0)
Requirement already satisfied: decorator in c:\programdata\anaconda3\lib\site-packages (from ratelim->geocoder) (4.3.0)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\programdata\anaconda3\lib\site-packages (from requests->geocoder) (3.0.4)
Requirement already satisfied: idna<2.8,>=2.5 in c:\programdata\anaconda3\lib\site-packages (from requests->geocoder) (2.7)
Requirement already satisfied: urllib3<1.24,>=1.21.1 in c:\programdata\anaconda3\lib\site-packages (from requests->geocoder) (1.23)
Requirement already satisfied: certifi>=2017.4.17 in c:\programdata\anaconda3\lib\site-packages (from requests->geocoder) (2018.8.24)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
"""

print('geocoder has not been installed before.')
import geocoder
print('geocoder has been successfully imported.')

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
geocoder has not been installed before.
geocoder has been successfully imported.


In [12]:
latitude=[] #List to collect the latitudes
longitude=[] #List to collect the longitudes

for i in df_toronto['Postcode']: #Iterating through Postalcodes to collect the locations data
    j='toronto,'+i
    try:
        #url ="https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}".format(API_key,j)
        url ="https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}".format(API_key,j)
        params = {'sensor': 'false', 'address': 'Mountain View, CA'}
        response = requests.get(url).json() # get response
        geographical_data = response['results'][0]['geometry']['location'] # get geographical coordinates
        
        latitude.append(geographical_data['lat'])
        longitude.append(geographical_data['lng'])
    except:
        pass

In [13]:
len(latitude)

0

# Part 2: Processing - Extracting Latitude and Longtitude for each Postal code in Toronto

In [10]:
latitude = []
longitude = []
for elem in Postcode_u:
# initialize your variable to None
    lat_lng_coords = None

# loop until you get the coordinates
    while (lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(elem))
        lat_lng_coords = g.latlng
        # print(lat_lng_coords)

    latitude.append(lat_lng_coords[0])
    longitude.append(lat_lng_coords[1])
    print(elem, 'is RECEIVED.')
    print(lat_lng_coords[0])
    print(lat_lng_coords[1])
    

KeyboardInterrupt: 

Since Google Geocode was not returning any coodinates information even after a log time, I have decided to use data from Geospatial_Coordinates.csv file

In [23]:
df_coords = pd.read_csv('Geospatial_Coordinates.csv')

In [24]:
df_coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [26]:
df_toronto = pd.merge(df_toronto, df_coords.rename(columns={'Postal Code':'Postcode'}), on='Postcode',  how='left')
df_toronto.head(11)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M4H,East York,Thorncliffe Park,43.705369,-79.349372
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
3,M9W,Etobicoke,Northwest,43.706748,-79.594054
4,M9L,North York,Humber Summit,43.756303,-79.565963


In [27]:
df_toronto.head(11)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M4H,East York,Thorncliffe Park,43.705369,-79.349372
2,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
3,M9W,Etobicoke,Northwest,43.706748,-79.594054
4,M9L,North York,Humber Summit,43.756303,-79.565963
5,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
6,M9N,York,Weston,43.706876,-79.518188
7,M3J,North York,"Northwood Park, York University",43.76798,-79.487262
8,M2H,North York,Hillcrest Village,43.803762,-79.363452
9,M2J,North York,Henry Farm,43.778517,-79.346556


In [29]:
df_toronto.to_csv('toronto_part2.csv')

End of part 2