#### Neighborhoods in Toronto

Scrape the Wikipedia page

In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

In [2]:
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.content, 'html.parser')

In [3]:
table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]

In [4]:
print(row)

['\nPostcode\nBorough\nNeighbourhood\n', '\nM1A\nNot assigned\nNot assigned\n', '\nM2A\nNot assigned\nNot assigned\n', '\nM3A\nNorth York\nParkwoods\n', '\nM4A\nNorth York\nVictoria Village\n', '\nM5A\nDowntown Toronto\nHarbourfront\n', '\nM5A\nDowntown Toronto\nRegent Park\n', '\nM6A\nNorth York\nLawrence Heights\n', '\nM6A\nNorth York\nLawrence Manor\n', "\nM7A\nQueen's Park\nNot assigned\n", '\nM8A\nNot assigned\nNot assigned\n', '\nM9A\nEtobicoke\nIslington Avenue\n', '\nM1B\nScarborough\nRouge\n', '\nM1B\nScarborough\nMalvern\n', '\nM2B\nNot assigned\nNot assigned\n', '\nM3B\nNorth York\nDon Mills North\n', '\nM4B\nEast York\nWoodbine Gardens\n', '\nM4B\nEast York\nParkview Hill\n', '\nM5B\nDowntown Toronto\nRyerson\n', '\nM5B\nDowntown Toronto\nGarden District\n', '\nM6B\nNorth York\nGlencairn\n', '\nM7B\nNot assigned\nNot assigned\n', '\nM8B\nNot assigned\nNot assigned\n', '\nM9B\nEtobicoke\nCloverdale\n', '\nM9B\nEtobicoke\nIslington\n', '\nM9B\nEtobicoke\nMartin Grove\n', '\nM

Clean the Data

In [5]:
df = pd.DataFrame(row)
df1 = df[0].str.split('\n', expand=True)
df2 = df1.rename(columns=df1.iloc[0])
df3 = df2.drop(df2.index[0])
df3.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
1,,M1A,Not assigned,Not assigned,
2,,M2A,Not assigned,Not assigned,
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,


## Ignore cells with a borough that is Not assigned

In [6]:

df4 = df3[df3.Borough != 'Not assigned']
df4.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,
6,,M5A,Downtown Toronto,Regent Park,
7,,M6A,North York,Lawrence Heights,


## Combine neighborhoods which have the same postcode

In [8]:
df5 = df4.groupby(['Postcode', 'Borough'], sort = False).agg(','.join)
df5.reset_index(inplace = True)
df5.head()

# df5 = df4.groupby(['Postcode', 'Borough'], as_index=False).agg(lambda neighborhoods: ', '.join(neighborhoods))

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


## Change the value of the Neighborhood to be like the Borough (Queen's Park)

In [9]:
df6 = df5.replace("Not assigned", "Queen's Park")
df6.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


Number of Rows

In [14]:
df.shape

(289, 1)

Read csv file of Geospatial_data

In [15]:
url = "http://cocl.us/Geospatial_data"
df7 = pd.read_csv(url)
df7.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
df7.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [16]:
df8 = pd.merge(df6, df7, on='Postcode')
df8.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [22]:

geolocator = Nominatim(user_agent="Coursera")
location = geolocator.geocode(address,timeout=10)
latitude = location.latitude
longitude = location.longitude

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df8['Latitude'], df8['Longitude'], 
                                           df8['Borough'], df8['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map