# Clustering and Segmenting Toronto Neighborhoods

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml')
# soup.prettify()

## Scrape wikipedia page for table

In [130]:
my_table = soup.find('table',{'class':'wikitable sortable'})

In [131]:
table_rows = my_table.findAll('tr')

In [132]:
header = ['PostalCode', 'Borough', 'Neighborhood']
header

['PostalCode', 'Borough', 'Neighborhood']

In [133]:
l = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text for tr in td]
    l.append(row)
neighborhoods = pd.DataFrame(l, columns=header)

In [134]:
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned\n
2,M2A,Not assigned,Not assigned\n
3,M3A,North York,Parkwoods\n
4,M4A,North York,Victoria Village\n


In [135]:
# Remove newlines
neighborhoods['Neighborhood'] = neighborhoods['Neighborhood'].str.replace('\n', '')
neighborhoods.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [136]:
# Change all the unknowns to a common value
neighborhoods = neighborhoods.replace('Not assigned', None)
neighborhoods.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,,
2,M2A,,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [137]:
# Drop unknowns
neighborhoods.dropna(how='any', axis='index', inplace=True)
neighborhoods.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [143]:
# Group by Postal Code
neighborhoods = neighborhoods.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(lambda x: ','.join(x.astype(str))).reset_index()
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
