## Segmenting Neighbourhoods in Toronto

#### Installing and Importing Libraries
#### BeautifulSoup4 and lxml for rendering the html data into readable table
#### pandas for manipulating Dataframe

In [3]:
!pip install lxml
!pip install bs4
import lxml as lx
import pandas as pd
import requests
from bs4 import BeautifulSoup

Collecting lxml
[?25l  Downloading https://files.pythonhosted.org/packages/55/6f/c87dffdd88a54dd26a3a9fef1d14b6384a9933c455c54ce3ca7d64a84c88/lxml-4.5.1-cp36-cp36m-manylinux1_x86_64.whl (5.5MB)
[K     |████████████████████████████████| 5.5MB 6.5MB/s eta 0:00:01     |█████████████████████████████   | 5.0MB 6.5MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.5.1
Collecting bs4
  Downloading https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f456174139ec089c769f89a94a1a4025fe967691de971f314/bs4-0.0.1.tar.gz
Collecting beautifulsoup4 (from bs4)
[?25l  Downloading https://files.pythonhosted.org/packages/66/25/ff030e2437265616a1e9b25ccc864e0371a0bc3adb7c5a404fd661c6f4f6/beautifulsoup4-4.9.1-py3-none-any.whl (115kB)
[K     |████████████████████████████████| 122kB 6.6MB/s eta 0:00:01
[?25hCollecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Downloading https://files.pythonhosted.org/packages/6f/8f/457f4a5390eeae1cc3aeab89deb7724c965be841ffca6

In [6]:
#Getting the data
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'xml')
table = soup.find('table',{'class':'wikitable sortable'})
A=[]
B=[]
C=[]
#Creating the dataframe
for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True).strip())
        B.append(cells[1].find(text=True).strip())
        C.append(cells[2].find(text=True).strip())
df=pd.DataFrame(A,columns=['Postal Code'])
df['Borough']=B
df['Neighborhood']=C
#Removing the Not Assigned Boroughs
df = df[df['Borough'] != 'Not assigned']
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [12]:
df.shape

(103, 3)

In [10]:
df[df['Postal Code'].isin(['M5G','M2H','M4B','M1J','M4M','M1R','M9V','M9L','M5V','M1B','M5A','M4G'])]

Unnamed: 0,Postal Code,Borough,Neighborhood
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
9,M1B,Scarborough,"Malvern, Rouge"
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
39,M4G,East York,Leaside
40,M5G,Downtown Toronto,Central Bay Street
46,M2H,North York,Hillcrest Village
54,M1J,Scarborough,Scarborough Village
80,M9L,North York,Humber Summit
84,M4M,East Toronto,Studio District
108,M1R,Scarborough,"Wexford, Maryvale"


In [13]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
df_geo.shape

(103, 3)

In [15]:
df1 = df.merge(df_geo, left_on='Postal Code', right_on='Postal Code')
df1.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [17]:
df1[df1['Postal Code'].isin(['M5G','M2H','M4B','M1J','M4M','M1R','M9V','M9L','M5V','M1B','M5A','M4G'])]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
23,M4G,East York,Leaside,43.70906,-79.363452
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
27,M2H,North York,Hillcrest Village,43.803762,-79.363452
32,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
50,M9L,North York,Humber Summit,43.756303,-79.565963
54,M4M,East Toronto,Studio District,43.659526,-79.340923
71,M1R,Scarborough,"Wexford, Maryvale",43.750072,-79.295849


In [16]:
print('Number of Unique Boroughs',len(df1['Borough'].unique()))
print('Number of Unique Neighborhoods',len(df1['Neighborhood'].unique()))
print('Shape of Merged DataFrame',df1.shape)

Number of Unique Boroughs 10
Number of Unique Neighborhoods 99
Shape of Merged DataFrame (103, 5)
