# Project: Battle of the Neighborhoods

This notebook is for the capstone project in the IBM Applied Data Science specialisation 

### Week 1: Setting up the Jupyter notebook

In [1]:
import pandas as pd
import numpy as np

In [2]:
# 1st Assignment - Setup the Jupyter notebook on a hosted service
print("Hello Capstone Project Course!")

Hello Capstone Project Course!


### Week 3: Segmenting and Clustering Neighborhoods in Toronto

#### Part 1: Storing Toronto Neighborhood Information into pandas DataFrame

In [8]:
import requests as req
from bs4 import BeautifulSoup as Soup

print("Imports done")

Imports done


In [22]:
# Fetch data from the resource link and store as a csv file
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = req.get(url).text
soup = Soup(res, 'html.parser')

# Use # for seperator since , can be used to group addresses
csv_data = soup.table.get_text().replace('\n\n\n', '\n').replace('\n\n', '#')[1:]  # Ignore the first seperator

# Write data into .csv file
with open("toronto.csv", "w") as f:
    f.write(csv_data)

In [33]:
# Read data into df
df = pd.read_csv("toronto.csv", header=0, sep='#')

# Remove any "Not Assigned" boroughs
df = df[df["Borough"] != "Not assigned"].reset_index(drop=True)

# Assign nbhood to have same name as borough if nbhood is "Not Assigned"
df["Neighborhood"] = df.T.apply(lambda x: x["Neighborhood"] if x["Neighborhood"] != "Not Assigned" else x["Borough"])

df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [34]:
df.shape

(103, 3)

#### Part 2: Fetching & Storing Neighborhood Coordinates into pandas DataFrame

In [35]:
# Read csv file with coords and store in seperate df
lat_df = pd.read_csv("https://cocl.us/Geospatial_data")

lat_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [40]:
# Merge df, lat_df along Postal Code
merged_df = df.set_index("Postal Code").join(lat_df.set_index("Postal Code")).reset_index()

merged_df.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


#### Part 3: Clustering Neighborhoods in Toronto

Create a new_df with only the borough's with Toronto in their name

In [43]:
# Select boroughs with "Toronto" in their name
new_df = merged_df[merged_df["Borough"].apply(lambda x: "toronto" in x.lower())].reset_index(drop=True)

new_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [44]:
new_df.shape

(39, 5)

Checking to see how the neighborhoods are distributed amongst the boroughs

In [53]:
new_df.groupby("Borough").count()

Unnamed: 0_level_0,Postal Code,Neighborhood,Latitude,Longitude
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,9,9,9,9
Downtown Toronto,19,19,19,19
East Toronto,5,5,5,5
West Toronto,6,6,6,6


In [57]:
!pip install folium
import folium

print("Imported folium!")

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 9.7MB/s ta 0:00:011
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Imported folium!


Showing a map of Toronto such that we can see the neighborhoods belonging to the same boroughs distinctly

In [67]:
# Get Toronto coordinates by using the average coordintaes of its' postal codes
toronto_coords = new_df['Latitude'].mean(), new_df['Longitude'].mean()

# Create dict that associates each borough with a unique color
borough_color = {
    "Central Toronto": "blue",
    "Downtown Toronto": "green",
    "East Toronto": "orange",
    "West Toronto": "purple"
}

# Create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[*toronto_coords], zoom_start=12)

# Add markers to map
for lat, lng, borough, neighborhoods in zip(new_df['Latitude'], new_df['Longitude'], new_df['Borough'], new_df['Neighborhood']):
    label = '{}: [{}]'.format(neighborhoods.split(',')[0], borough)  # only show one nbh per borough otherwise it gets hard to read
    label = folium.Popup(label, parse_html=True)

    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=borough_color[borough],
        fill=True,
        fill_color="grey",
        fill_opacity=0.7,
        parse_html=False
).add_to(map_toronto)  

map_toronto

In [69]:
# The code was removed by Watson Studio for sharing.