<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>

<h2>Get Neighbourhood by Postcode</h2>

In [1]:
import requests
import pandas as pd

In [2]:
# Get the webpage
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = requests.get(url)

In [3]:
# Load the data if you were able to connect
if response.status_code == 200 or response.status_code == 201:
    frames = pd.read_html(response.content, flavor='html5lib')
    df = frames[0]
    print("Data loaded sucessfully")
    
# Print an error message if you wee unable to connect
else:
    print("Error: " + str(response.status_code))
    print("Could not connect.")

Data loaded sucessfully


In [4]:
# Remove rows where there is no assigned Borough
df = df[df["Borough"] != "Not assigned"]

In [5]:
# Concatonate neighborhoods into one row per postcode/borough
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(','.join).reset_index()

In [6]:
# Copy Borough to Neighborhood if none exists
for i in range(0, len(df)):
    if df['Neighbourhood'][i] == 'Not assigned':
        df['Neighbourhood'][i] = df['Borough'][i]

In [7]:
df.shape

(103, 3)

<h2>Add Latitude and Longitude Information</h2>

In [8]:
# Get the latitude and longitude data
url = "https://cocl.us/Geospatial_data"
geo = pd.read_csv(url)

In [9]:
# Merge the two dataframes on their postal codes
df = df.merge(geo, left_on=['Postcode'], right_on=['Postal Code'])

In [10]:
# Clean up the merged dataframe
df = df.drop(columns=['Postal Code'])
df = df.rename(columns={'Postcode': 'PostalCode'})
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [11]:
df.shape

(103, 5)

<h2>Analysis</h2>

In [14]:
import folium

In [17]:
#Get only data from Boroughs with Toronto in the title
toronto = df[df['Borough'].str.contains("Toronto")]
toronto = toronto.reset_index()
toronto.head()

Unnamed: 0,index,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,37,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,41,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,42,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,43,M4M,East Toronto,Studio District,43.659526,-79.340923
4,44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [22]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[toronto['Latitude'][0], toronto['Longitude'][0]], zoom_start=10)

for lat, lng, borough, neighborhood in zip(toronto['Latitude'], toronto['Longitude'], toronto['Borough'], toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto