 explore and cluster the neighborhoods in Toronto.

In [11]:
!pip install BeautifulSoup4
!pip install requests
!pip install geocoder



In [12]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

#get html from wiki page and create soup object
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')

#using soup object, iterate the .wikitable to get the data from the HTML page and store it into a list
data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row of data is the header
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
canada_df = pd.DataFrame(data = data,columns = columns)
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [13]:
canada_df = canada_df[canada_df['Borough'] != 'Not assigned']
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [14]:
# More than one neighborhood can exist in one postal code area, combined these into one row with the neighborhoods separated with a comma
canada_df["Neighbourhood"] = canada_df.groupby("Postal Code")["Neighbourhood"].transform(lambda neigh: ', '.join(neigh))

#remove duplicates
canada_df = canada_df.drop_duplicates()

#update index to be postcode if it isn't already
if(canada_df.index.name != 'Postal Code'):
    canada_df = canada_df.set_index('Postal Code')
    
canada_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [15]:
canada_df['Neighbourhood'].replace("Not assigned", canada_df["Borough"],inplace=True)
canada_df.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,"Regent Park, Harbourfront"
M6A,North York,"Lawrence Manor, Lawrence Heights"
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [16]:
canada_df.shape

(103, 2)

In [17]:
import geopy
from  geopy.geocoders import Nominatim
nominatim_service = Nominatim(user_agent='X@yy.com') # Important line
geopy.geocoders.options.default_user_agent = "X@yy.com" # Important line
geolocator = Nominatim()

In [18]:
city ="Toronto"
country ="Canada"
loc = geolocator.geocode(city+','+ country)
print("latitude is :-" ,loc.latitude,"\nlongtitude is:-" ,loc.longitude)

latitude is :- 43.6534817 
longtitude is:- -79.3839347


In [22]:
location = geolocator.geocode("Toronto, North York, Parkwoods")
print(location.address)
print('')
print((location.latitude, location.longitude))
print('')
print(location.raw)

Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada

(43.7587999, -79.3201966)

{'place_id': 128673886, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright', 'osm_type': 'way', 'osm_id': 160406961, 'boundingbox': ['43.7576231', '43.761106', '-79.3239088', '-79.316215'], 'lat': '43.7587999', 'lon': '-79.3201966', 'display_name': 'Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada', 'class': 'highway', 'type': 'secondary', 'importance': 0.51}


In [29]:
import geopy
from  geopy.geocoders import Nominatim
import pandas as pd
locator = Nominatim(user_agent="KapilsGeocoder")
location = locator.geocode("Toronto, Canada")
from geopy.extra.rate_limiter import RateLimiter
# PostalCode  Borough  Neighborhood
df_temp=canada_df.copy()
# 1 - conveneint function to delay between geocoding calls
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
# 2- - create location column
df_temp['Address'] = df_temp['Neighbourhood'].astype(str) + ',' + ' Toronto'   
df_temp['Location'] = df_temp['Address'].apply(geocode)
# 3 - create longitude, laatitude and altitude from location column (returns tuple)
df_temp['Point'] = df_temp['Location'].apply(lambda loc: tuple(loc.point) if loc else None)
# 4 - split point column into latitude, longitude and altitude columns
df_temp[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df_temp['Point'].tolist(), index=df_temp.index)
df_temp # get the Get the latitude and the longitude coordinates of each neighborhood obtained

Unnamed: 0_level_0,Borough,Neighbourhood,Address,Location,Point,latitude,longitude,altitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M3A,North York,Parkwoods,"Parkwoods, Toronto","(Parkwoods Village Drive, Parkway East, Don Va...","(43.7587999, -79.3201966, 0.0)",43.758800,-79.320197,0.0
M4A,North York,Victoria Village,"Victoria Village, Toronto","(Victoria Village, Don Valley East, North York...","(43.732658, -79.3111892, 0.0)",43.732658,-79.311189,0.0
M5A,Downtown Toronto,"Regent Park, Harbourfront","Regent Park, Harbourfront, Toronto",,,,,
M6A,North York,"Lawrence Manor, Lawrence Heights","Lawrence Manor, Lawrence Heights, Toronto","(Lawrence Avenue West, Lawrence Manor, Eglinto...","(43.7163909, -79.4425663, 0.0)",43.716391,-79.442566,0.0
M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government","Queen's Park, Ontario Provincial Government, T...",,,,,
...,...,...,...,...,...,...,...,...
M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North","The Kingsway, Montgomery Road, Old Mill North,...",,,,,
M4Y,Downtown Toronto,Church and Wellesley,"Church and Wellesley, Toronto","(The Toronto, 77, Howard Street, St. James Tow...","(43.6708625, -79.37279241253721, 0.0)",43.670862,-79.372792,0.0
M7Y,East Toronto,"Business reply mail Processing Centre, South C...","Business reply mail Processing Centre, South C...",,,,,
M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...","Old Mill South, King's Mill Park, Sunnylea, Hu...",,,,,
