# Notebook for the Toronto Neighborhood and Segmentation Exercise

In [191]:
# Import libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
#import geocoder # import geocoder
import folium # map rendering library
import os

## Question 1: Data Scraping

In [190]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')
tab = soup.find('table', attrs='wikitable sortable')
#<table class="wikitable sortable">
#    summary = article.find('div', class_='entry-content').p.text
df = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

# tr = table record
# td = table data
i = 0
for row in tab.find_all('tr')[1:]:
    td = row.find_all('td')
    
    post_code = td[0].text
    borough = td[1].text
    neighborhood = td[2].text[0:-1] + ', '# Remove new line character
    
    df = df.append({'PostalCode':post_code,
                  'Borough':borough,
                  'Neighborhood':neighborhood}, ignore_index=True)

# For neighboor not assigned, assign borough name
df['Neighborhood'][(df['Neighborhood'] == 'Not assigned, ') & (df['Borough'] != 'Not assigned')] = df['Borough']

# Remove Not assigned boroughs
df = df.replace(to_replace='Not assigned', value=np.nan).dropna(axis=0)

# Reshape the dataframe
df = df.groupby(['PostalCode', 'Borough']).agg({'Neighborhood':'sum'}).reset_index()
df['Neighborhood'] = df['Neighborhood'].map(lambda x: str(x)[0:-2])

df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [168]:
df.shape

(103, 3)

## Question 2: Merge Neighborhood Information with Location Data

In [184]:
# Import dataframe with coordinates for all postal codes
df_coord = pd.read_csv('Geospatial_Coordinates.csv')

# Merge dataframes together
df_result = pd.merge(left=df, right=df_coord, left_on='PostalCode', right_on='Postal Code', how='inner')
df_result = df_result.drop('Postal Code', axis=1) 

In [186]:
df_result.head(3)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711


## Question 3: Visualise Neighborhoods

In [187]:
df_result['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       'Mississauga', 'Etobicoke', "Queen's Park"], dtype=object)

In [193]:
tor_lat = 43.6532
tor_lon = -79.3832
map_toronto = folium.Map(location=[tor_lat, tor_lon], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhoods in zip(df_result['Latitude'], df_result['Longitude'], df_result['Borough'], df_result['Neighborhood']):
    label = '{}, {}'.format(neighborhoods, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#029386',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto


In [None]:
# End of notebook