# Explore and cluster the neighborhoods in Toronto

In [1]:
# import Libraries that we need
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd
from geopy.geocoders import Nominatim
import folium
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

## Read csv file of Geospatial_data¶


In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result = requests.get(url)
print(url)
print(result.status_code)
print(result.headers)

https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
200
{'Date': 'Mon, 20 Jan 2020 03:47:31 GMT', 'Vary': 'Accept-Encoding,Cookie,Authorization', 'Server': 'ATS/8.0.5', 'X-ATS-Timestamp': '1579492051', 'Content-Type': 'text/html; charset=UTF-8', 'X-Powered-By': 'PHP/7.2.26-1+0~20191218.33+debian9~1.gbpb5a340+wmf1', 'X-Content-Type-Options': 'nosniff', 'P3P': 'CP="See https://en.wikipedia.org/wiki/Special:CentralAutoLogin/P3P for more info."', 'Content-language': 'en', 'Content-Encoding': 'gzip', 'Last-Modified': 'Sat, 18 Jan 2020 14:35:20 GMT', 'Backend-Timing': 'D=100860 t=1579405897072840', 'X-Varnish': '555012440 221546537', 'Age': '23730', 'X-Cache': 'cp5008 miss, cp5012 hit/8', 'X-Cache-Status': 'hit-front', 'Server-Timing': 'cache;desc="hit-front"', 'Strict-Transport-Security': 'max-age=106384710; includeSubDomains; preload', 'Set-Cookie': 'WMF-Last-Access=20-Jan-2020;Path=/;HttpOnly;secure;Expires=Fri, 21 Feb 2020 00:00:00 GMT, WMF-Last-Access-Global=20-Jan-2020;Path

In [3]:
soup = BeautifulSoup(result.content, 'html.parser')
table = soup.find('table')
trs = table.find_all('tr')
rows = []
for tr in trs:
    i = tr.find_all('td')
    if i:
        rows.append(i)
        
lst = []
for row in rows:
    postalcode = row[0].text.rstrip()
    borough = row[1].text.rstrip()
    neighborhood = row[2].text.rstrip()
    if borough != 'Not assigned':
        if neighborhood == 'Not assigned':
            neighborhood = borough
        lst.append([postalcode, borough, neighborhood])

# lst

In [4]:
cols = ['PostalCode', 'Borough', 'Neighborhood']
df = pd.DataFrame(lst, columns=cols)
print(df.shape)
# df[df.duplicated(['PostalCode'], keep=False)] - this would have shown the duplicate PostalCodes

(210, 3)


In [5]:
df = df.groupby('PostalCode').agg(
    {
        'Borough':'first', 
        'Neighborhood': ', '.join,}
    ).reset_index()

In [6]:
df.loc[df['PostalCode'] == 'M5A']


Unnamed: 0,PostalCode,Borough,Neighborhood
53,M5A,Downtown Toronto,Harbourfront


In [7]:
df.shape


(103, 3)

In [9]:
dfgeo = pd.read_csv("D://downloads//Geospatial_Coordinates.csv")
dfgeo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [10]:
df2 = pd.merge(df, dfgeo, on="PostalCode", how='left')


In [11]:
df2.loc[df2['PostalCode'] == 'M5G']


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [12]:
df2.loc[df2['PostalCode'] == 'M9V']


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


## Generate map to visualize neighborhoods and how they cluster together¶


In [13]:
# toranto map
geolocator = Nominatim(user_agent="coursera")
address = 'Toronto'
try:
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinates of {} are {}, {}.'.format(address, latitude, longitude))
except AttributeError:
    print('Cannot find: {}, will drop index: {}'.format(address, index))

my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df2['Latitude'], df2['Longitude'], df2['PostalCode']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(my_map)  
    
my_map

The geograpical coordinates of Toronto are 43.653963, -79.387207.
