# Week 3 Segmenting and Clustering of Neighborhoods in Toronto

In [156]:
# Import packages
import requests
from bs4 import BeautifulSoup as BS
import pandas as pd
import geocoder
import numpy as np
import folium

### Question 2 - 4

Using BeautifulSoup to scrape the wiki page for the table in question

In [17]:
# Get url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
s = requests.Session()
response = s.get(url, timeout = 10).text

# Read HTML data
soup  = BS(response, "lxml")

In [101]:
# Extract table
table = soup.find("table", {"class", "wikitable sortable"})

# Sort through the rows and extract 
table_dict = dict()
pc = []
b = []
nh = []
for row in table.findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 3:
        if cells[1].text.replace("\n", "") != "Not assigned":
            pc.append(cells[0].text.replace("\n", ""))
            b.append(cells[1].text.replace("\n", ""))
            nh.append(cells[2].text.replace("\n", ""))

table_dict["PostalCode"], table_dict["Borough"], table_dict["Neighbourhood"] = pc, b, nh
df = pd.DataFrame(table_dict)

In [143]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [131]:
df.shape

(103, 3)

## Question 5

In [153]:
coords = pd.read_csv("Geospatial_Coordinates.csv")
coords.rename(columns = {"Postal Code":"PostalCode"}, inplace = True)
df_merged = df.merge(coords, on = "PostalCode")
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [162]:
# Create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[df_merged.Latitude[0], df_merged.Longitude[0]], zoom_start = 10)

# Add markers to map
for lat, lng, borough, neighborhood in zip(df_merged['Latitude'],
                                           df_merged['Longitude'],
                                           df_merged['Borough'],
                                           df_merged['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto