<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>
<h3>import libreries</h3>

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 
import folium

<h3>scrap data and convert them to text </h3>

In [3]:
r  = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
data = r.text
soup = BeautifulSoup(data)

<h3>get the table from the page scraped</h3>

In [4]:
table=soup.table
row_length = len(table.find_all('tr'))

<h3>get data from text  table to list </h3>

In [5]:
data = [ [ 0 for i in range(3) ] for j in range(row_length) ] 
for i in range (1,row_length):
    data[i][0]=table.find_all('tr')[i].find_all('td')[0].getText()
    data[i][1]=table.find_all('tr')[i].find_all('td')[1].getText().rstrip('\n')
    data[i][2]=table.find_all('tr')[i].find_all('td')[2].getText().rstrip('\n')


<h3>convert the list to dataFrame</h3>

In [6]:
df = pd.DataFrame(data, columns = ['PostCode', 'Borough','Neighbourhood']) 
df.drop(0)

Unnamed: 0,PostCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
10,M8A,Not assigned,Not assigned


<h3>Delete Not assigned rows </h3>

In [7]:
df=df[df.Borough != 'Not assigned']
df

Unnamed: 0,PostCode,Borough,Neighbourhood
0,0,0,0
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,Rouge


<h3>replace not assigned neighborhood by Borough name </h3>
    
 

In [8]:
for i in range (len(df.Neighbourhood)):
    if df.iloc[i,2]=="Not assigned":
        df.iloc[i,2]=df.iloc[i,1]


<h3>Group post code and combine neighborhood</h3>

In [9]:
df = df.groupby(['PostCode','Borough'])['Neighbourhood'].apply(lambda x: ', '.join(x.astype(str))).reset_index()


In [10]:
df.drop(0)

Unnamed: 0,PostCode,Borough,Neighbourhood
1,M1B,Scarborough,"Rouge, Malvern"
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae
6,M1J,Scarborough,Scarborough Village
7,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
8,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
9,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
10,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [11]:
post_code= pd.read_csv('http://cocl.us/Geospatial_data')
post_code.columns=['PostCode','Latitude','Longitude']
post_code.head()

Unnamed: 0,PostCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
neighborhoods = pd.merge(df, post_code, on='PostCode')
neighborhoods.head()

Unnamed: 0,PostCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [18]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[ 43.653226, -79.3831843], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto