# Segmenting and Clustering Neighborhoods in Toronto Assignment

## Part 1 - Getting Toronto Data

In [1]:
import pandas as pd
import requests

In [9]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

html = requests.get(url).content
df_list = pd.read_html(html)
df = df_list[0]

df = df[df.Borough != 'Not assigned']
df.reset_index(drop=True, inplace=True)
df.Neighborhood.fillna(df.Borough, inplace=True)
df = df.groupby(['Postal Code','Borough'])['Neighborhood'].apply(', '.join).reset_index()

df.shape

(103, 3)

## Part 2 - Merging Geo Coordinates Data

In [10]:
cord_df = pd.read_csv('https://cocl.us/Geospatial_data')

df = pd.merge(df, cord_df, on="Postal Code")
print('Toronto has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)
df.head()

Toronto has 10 boroughs and 103 neighborhoods.


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Part 3 - Clustering the Neighbourhoods of Toronto, Canada

In [12]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

In [18]:
latitude = 43.6534817
longitude = -79.3839347
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto