## Segmenting and clustering Neighbourhoods of Toronto:

#### importing necessary libraries

In [1]:
import pandas as pd
import numpy as np
import folium
from bs4 import BeautifulSoup  # for web-scraping
import requests

#### accessing html content and reading the text data

In [2]:
data= requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
data= data.content

soup= BeautifulSoup(data)
table= soup.find('table')
tr= table.find_all('tr')

row= []
for tr in tr:
    td= tr.find_all('td')
    row.append([i.text for i in td])

#### converting the data into the required dataframe and cleaning the data as required

In [3]:
row[0]= ['Postal Code','Borough', "Neighbourhood"]

df= pd.DataFrame(row)     # converting data to dataframe
df.columns= df.loc[0].tolist()
df= df.iloc[1:,:]

df['Postal Code']= [i[:-1] for i in df['Postal Code']]
df['Borough']= [i[:-1] for i in df['Borough']]
df['Neighbourhood']= [i[:-1] for i in df['Neighbourhood']]

df= df[df['Borough']!= 'Not assigned']  # removing any data entry that has a 'not assigned' borough

df.reset_index(inplace= True,drop= True)

for i in df.index:  
    if df.iloc[i,2]== "Not assigned":     # replacing 'not assigned' neighbourhood values with that of the borough
        df.iloc[i,2]= df.iloc[i,1]        

In [4]:
print('there are {} no. of rows in the dataframe'.format(df.shape[0]))

there are 103 no. of rows in the dataframe


#### importing the geocoder library to calculate the latitude and longitude values of different neighbourhoods and adding it to the dataframe

In [18]:
#importing geocoder library
import geocoder

In [38]:
lat,lng= [],[]  #empty list for future append

for code,bor,nei in zip(df['Postal Code'],df['Borough'],df['Neighbourhood']):
    
    try:
        ll= geocoder.arcgis('{},{},{},Toronto'.format(code,nei,bor)).latlng
    except:
        ll= None
        
    lat.append(ll[0])  #appending the latitude and longitude values 
    lng.append(ll[1])
    
df['Latitude']= lat  #adding the data to the dataframe
df['Longitude']= lng 

In [44]:
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.752935,-79.335641
1,M4A,North York,Victoria Village,43.728102,-79.311890
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.659208,-79.361443
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.723570,-79.437110
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.660192,-79.390411
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.650271,-79.500021
99,M4Y,Downtown Toronto,Church and Wellesley,43.665696,-79.380932
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.648690,-79.385440
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.644360,-79.567130
