# 1. web scraping for postcode, borough, neighborhood dataframe 

### 1.1 import packages 

In [1]:
import numpy as np
import pandas as pd
import requests # for getting html content
from bs4 import BeautifulSoup # for obtaining clean html content

### 1.2 download content from wiki, obtain & clean up content in the table

In [2]:
############1. obtain data from wiki (using BeautyfulSoup)####################
wiki_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_postal_codes = requests.get(wiki_link)

soup = BeautifulSoup(wiki_postal_codes.content, 'lxml')
postal_table = soup.table

############2. get content from the entire table, parse into array of strings #############
body = postal_table.text.split("\n")
while ("" in body):
    body.remove("")
    
############3. reshape the array into N x 3 matrix, first row is col name, remaining are content ######
postal_table = np.reshape(body,(-1,3))
col = postal_table[0]
data = postal_table[1:]

df = pd.DataFrame(data, columns = col)

############# 4. data clean up -- remove rows with "Not assigned" Borough ############
df = df[df.Borough != "Not assigned"]

############# 4. data clean up -- assign the "Not assigned" Neighbourhood with the same Borough name ############
ind = df.loc[df['Neighbourhood']=='Not assigned']
df.Neighbourhood.replace("Not assigned", ind['Borough'], inplace = True)

############# 4. data clean up -- group the Neighbourhood with the same Postcode ############
df = df.groupby('Postcode').agg({'Borough':'first',
                                    'Neighbourhood':", ".join}).reset_index()

df.head(15)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


### 1.3 print shape of the resulting dataframe

In [3]:
df.shape

(103, 3)

# 2. add longitude, lattitude data into data frame (using postalcode)

In [4]:
'''
import geocoder # install geocoder

# initialize your variable to None
lat_lng_coords = None

g = geocoder.google('Mountain View, CA') # a sample request to test performance

# it takes forever to load a single lat_lng data, using .csv instead..

while lat_lng_coords == None:
    g = geocoder.google('Mountain View, CA')
    lat_lng_coords = g.latlng
print(g.latlng)
'''

"\nimport geocoder # install geocoder\n\n# initialize your variable to None\nlat_lng_coords = None\n\ng = geocoder.google('Mountain View, CA') # a sample request to test performance\n\n# it takes forever to load a single lat_lng data, using .csv instead..\n\nwhile lat_lng_coords == None:\n    g = geocoder.google('Mountain View, CA')\n    lat_lng_coords = g.latlng\nprint(g.latlng)\n"

In [5]:
postcode_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
postcode_latlng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
postal_df = pd.concat([df, postcode_latlng], axis = 1, sort = False, join = 'outer')
postal_df.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",M1N,43.692657,-79.264848
