# Segmenting and Clustering Neighborhoods in Toronto

Install BeautifulSoup 4 

In [1]:
!pip install beautifulsoup4
!pip install geocoder



In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

2. Use soup to get table with postcodes and also remove rows where Borough is not assigned *

In [3]:
soup = BeautifulSoup(req.content,'lxml')

table = soup.find('table', attrs={'class':'wikitable sortable'})

table_rows = table.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)


df = pd.DataFrame(res, columns=["Postcode", "Borough", "Neighbourhood"])
df = df.astype(str)

df.shape



(288, 3)

In [4]:
boroughs = df[df.Borough != 'Not assigned']
boroughs.shape

(211, 3)

* Replace all values where Neighbourhood == 'Not assigned' with Borough Name *

In [5]:
boroughs.Neighbourhood[boroughs.Neighbourhood == 'Not assigned'] = boroughs.Borough

boroughs.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


(211, 3)

* Merge Neighbourhood from same Borough*

In [6]:

finalBdf = boroughs.groupby(['Postcode','Borough']).agg(', '.join).reset_index()
finalBdf.shape

(103, 3)

*** As google geoCoder APIs are not reliable using csv file available. Please have the csv file 'Geospatial_Coordinates.csv' in same foler as jyp notebook ***

In [17]:

geoCoord=pd.read_csv('Geospatial_Coordinates.csv')

geoCoord.rename(index=str, columns={"Postal Code": "Postcode"}, inplace=True)


toronto_data = boroughs.merge(geoCoord, on="Postcode", how = 'inner')

toronto_data.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763
5,M6A,North York,Lawrence Manor,43.718518,-79.464763
6,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
7,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
8,M1B,Scarborough,Rouge,43.806686,-79.194353
9,M1B,Scarborough,Malvern,43.806686,-79.194353


*** Import required libraries ***

In [14]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values


# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library



Now get the coordinates of Toronto, Ontario required for map

In [16]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="tn_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Add circle marks for all Boroughs to explore the area 

In [22]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto