<H1>Toronto Neighbourhoods

Uncomment below lines of code  to install the packages required for this exercise

In [None]:
#pip install pandas
#pip install lxml
#pip install html5lib
#pip install BeautifulSoup4

Import the table from wiki

In [389]:
import pandas as pd

# extract tables from wikipedia
from pandas.io.html import read_html
page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikitables = read_html(page,  attrs={"class":"wikitable"})
print ("Extracted {num} wikitables".format(num=len(wikitables)))

# temp Dataframe (t) for storing the table 
t=wikitables[0]
t.head(3)

Extracted 1 wikitables


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods


In [405]:
t.shape

(288, 3)

Data Cleaning and Assumptions

In [413]:

#Ignore cells with a borough that is Not assigned
Toronto_neighbours=t[t['Borough']!='Not assigned']

# Combine more than one neighborhood having one postal code area
Toronto_neighbours=Toronto_neighbours.groupby(['Postcode','Borough'])['Neighbourhood'].agg([('Neighbourhood', ', '.join)]).reset_index()

# Replace 'Not Assigned' Neighbourhood value with Borough value
Toronto_neighbours.at[8,'Neighbourhood']=Toronto_neighbours[Toronto_neighbours['Neighbourhood']=='Not assigned'].iloc[0,1]

#introduce latitude and longitude columns to get the latitude and longitude
Toronto_neighbours['Latitude']=""
Toronto_neighbours['Longitude']=""

Toronto_neighbours.shape

(103, 5)

Functions to get the Latitude and Longitude of the locations

In [414]:

from geopy.geocoders import Nominatim 

# function to get the latitude and longitude for all the neighbourhoods
def get_lat_long(Address):
    geolocator = Nominatim(user_agent='a')
    location = geolocator.geocode(Address) 
    return(location.latitude, location.longitude) 

Call the above function to populate the latitude and longitude of all the locations

In [415]:
# get and update the latitude and longitude for all the neighbourhood in Toronto_neighbours DataFrame
for i,m in Toronto_neighbours.iterrows():
    if Toronto_neighbours['Latitude'][i]=="" and Toronto_neighbours['Longitude'][i]=="":
        try:
            lat,long=get_lat_long(Toronto_neighbours['Neighbourhood'].astype(str)[i] +', Toronto, Ontario')
        except:
            pass
        Toronto_neighbours['Latitude'][i]=lat
        Toronto_neighbours['Longitude'][i]=long

In [416]:
#take the backup of the Troronto_neighbours with updated latitude and longitude information

Toronto_neighbours_BAK=Toronto_neighbours

Cross validate the Data

In [420]:
Toronto_neighbours.head(3)
# Check if there are missing location information
Toronto_neighbours[Toronto_neighbours['Latitude']=='']
#Toronto_neighbours.to_csv('test1234.csv')

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude


In [419]:
# Print data information

print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(Toronto_neighbours['Borough'].unique()),
        Toronto_neighbours.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


PLot the data on map

In [400]:
# create map of Toronto using latitude and longitude values
import folium
latitude,longitude=get_lat_long('Toronto')
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_neighbours['Latitude'], Toronto_neighbours['Longitude'], Toronto_neighbours['Borough'], Toronto_neighbours['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork
