# Question 1

## Import needed libraries

In [26]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

## Parse Wokipedia page

In [5]:
html = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(html,'lxml')

print('HTML representation of Wikipedia page.')
print(soup.prettify())

HTML representation of Wikipedia page.
<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"aaae9827-6475-4db3-b696-5c7b4e175e84","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":969510799,"wgRevisionId":969510799,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description i

## Extract table with data

In [12]:
table = soup.find('table', {'class':'wikitable sortable'})
headers = list(filter(None, table.tr.text.splitlines()))

# Extract headers from table too.
print(headers)

['Postal Code', 'Borough', 'Neighbourhood']


## Transform table data

In [43]:
def parse_table(t):
    table = ""
    for tr in t.find_all('tr'):
        row = ""
        for td in tr.find_all('td'):
            row = row + "," + td.text.replace('\n', '')
        table = table + row[1:] + '\n'
    return table

parsed = parse_table(table)
print(parsed)


M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Regent Park, Harbourfront
M6A,North York,Lawrence Manor, Lawrence Heights
M7A,Downtown Toronto,Queen's Park, Ontario Provincial Government
M8A,Not assigned,Not assigned
M9A,Etobicoke,Islington Avenue, Humber Valley Village
M1B,Scarborough,Malvern, Rouge
M2B,Not assigned,Not assigned
M3B,North York,Don Mills
M4B,East York,Parkview Hill, Woodbine Gardens
M5B,Downtown Toronto,Garden District, Ryerson
M6B,North York,Glencairn
M7B,Not assigned,Not assigned
M8B,Not assigned,Not assigned
M9B,Etobicoke,West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
M1C,Scarborough,Rouge Hill, Port Union, Highland Creek
M2C,Not assigned,Not assigned
M3C,North York,Don Mills
M4C,East York,Woodbine Heights
M5C,Downtown Toronto,St. James Town
M6C,York,Humewood-Cedarvale
M7C,Not assigned,Not assigned
M8C,Not assigned,Not assigned
M9C,Etobicoke,Eringate, B

## Convert table into a DataFrame

In [47]:
file = open("toronto.csv", "wb")
file.write(bytes(parsed, encoding="ascii", errors="ignore"))

df = pd.read_csv('toronto.csv', header=None, usecols=[0,1,2])
df.columns = headers

df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park
5,M6A,North York,Lawrence Manor
6,M7A,Downtown Toronto,Queen's Park
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern


## Remove useless data

In [49]:
indexes = df[ df['Borough'] =='Not assigned'].index
df.drop(indexes , inplace=True)

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
df.loc[df['Neighbourhood'] =='Not assigned' , 'Neighbourhood'] = df['Borough']
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park
5,M6A,North York,Lawrence Manor
6,M7A,Downtown Toronto,Queen's Park
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern
11,M3B,North York,Don Mills
12,M4B,East York,Parkview Hill
13,M5B,Downtown Toronto,Garden District


## Shape of data

In [50]:
df.shape

(103, 3)

# Question 2

## Load data and transform it into a DataFrame

In [62]:
df_lon_lat = pd.read_csv('Geospatial_Coordinates.csv')
df_lon_lat.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


## Attach coordinates to Neighbourhood DataFrame

In [63]:
toronto_df = pd.merge(df, df_lon_lat, on='Postal Code')
toronto_df.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
3,M6A,North York,Lawrence Manor,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill,43.706397,-79.309937
9,M5B,Downtown Toronto,Garden District,43.657162,-79.378937


# Question 3

## Replicate analysis made for NYC

In [67]:
# Import libraries
#import numpy as np
#import json # library to handle JSON files

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

#import requests # library to handle requests
#from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
#import matplotlib.cm as cm
#import matplotlib.colors as colors

# import k-means from clustering stage
#from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [66]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## Create a map of Toronto with neighborhoods superimposed on top

In [69]:
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(toronto_map)  
    
toronto_map