# Scraping Toronto neighborhood data

In [None]:
pip install lxml
pip install html5lib
pip install BeautifulSoup4
import pandas as pd
import numpy as np

In [22]:
df = pd.read_html('http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', match="Borough")[0]
df.columns = ["PostalCode", "Borough", "Neighborhood"]
df['Borough'].replace("Not assigned", np.nan, inplace=True)
df.dropna(subset=['Borough'], inplace=True)
df['Neighborhood'] = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].transform(lambda x: ','.join(x))
df['Neighborhood'].replace("Not assigned", df['Borough'], inplace=True)
df.drop_duplicates(inplace=True)
df.reset_index(drop=True, inplace=True)
df.shape

(103, 3)

In [23]:
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


# Combine the neighborhood data with the coordinate data

In [24]:
geo = pd.read_csv('http://cocl.us/Geospatial_data')
geo.rename(columns={"Postal Code" : "PostalCode"}, inplace=True)
df_geo = pd.merge(df, geo, how="inner", on="PostalCode")

In [26]:
df_geo.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


# Neighborhood map

In [None]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

In [None]:
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [14]:
address = 'Toronto City'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_geo['Latitude'], df_geo['Longitude'], df_geo['Borough'], df_geo['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto