## Importing required libraries

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import csv

In [2]:
import requests

## Setting maxcolwidth to 800 for readability

In [3]:
pd.set_option('max_colwidth', 800)

## instantiating BeautifulSoup object and reading the table from wikipedia page

In [4]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text 
soup = BeautifulSoup(source, 'lxml')

## Creating a csv_writer to append the scraped content in an xlx file by initally defining column names 

In [5]:
csv_file = open('toronto_postal_codes.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Postcode', 'Borough', 'Neighbourhood'])

32

## Main Scraping of data begins here

In [64]:
table = soup.find('table', class_ = 'wikitable') # Gets the table from the webpage
rows = table.find_all('tr') # Gets the table rows

postcodes = [] # Initializes the raw postcodes list
boroughs = [] # Initializes the raw boroughs list
neighbourhoods = [] # Initializes the raw neighbourhoods list

for row in rows:    
    columns = row.find_all('td')
    try :
        if columns[1].text != 'Not assigned':  # To skip if the borough name is 'Not Assigned'
            
            postcode = columns[0].text
            postcodes.append(postcode)
            
            borough = columns[1].text
            boroughs.append(borough)
            
            neighbourhood = columns[2].text.split('\n')[0] # Removing the newline character at the end     
            
            if neighbourhood == 'Not assigned': # Assigning the same name to neighbourhood if it is 'Not Assigned'
                neighbourhood = borough            
                
            neighbourhoods.append(neighbourhood)
             
    except Exception as e : # To skip the first row which contains column names
        pass 
    
postcode_explored = [] # Initializing the list of explored postcodes
for index_i, postcode_i in enumerate(postcodes) :   
    if postcode_i not in postcode_explored :
        nbds = neighbourhoods[index_i]
        for index_f, postcode_f in enumerate(postcodes) :
            if postcode_i == postcode_f and index_i != index_f:
                nbds = nbds + ', ' + neighbourhoods[index_f] # Concatenating the neighbourhood names
        csv_writer.writerow([postcode_i, boroughs[index_i], nbds]) # Writing the rows in the csv file
        postcode_explored.append(postcode_i)


ValueError: I/O operation on closed file.

## Closing the CSV file

In [10]:
csv_file.close()

## Creating a pandas dataframe

In [167]:
toronto_df=pd.read_csv('toronto_postal_codes.csv')

## Gauging the shape of the created pandas dataframe

In [168]:
toronto_df.shape

(180, 3)

## Final Reformatting and resultant dataframe

In [169]:
toronto_df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A\r\n,Not assigned\r\n,
1,M2A\r\n,Not assigned\r\n,
2,M3A\r\n,North York\r\n,Parkwoods
3,M4A\r\n,North York\r\n,Victoria Village
4,M5A\r\n,Downtown Toronto\r\n,Regent Park / Harbourfront


In [170]:
toronto_df=toronto_df.replace('\r\n', '',regex=True)

In [171]:
toronto_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [172]:
toronto_df_copy=toronto_df #making a copy for redundancy 

In [179]:
drop_index=toronto_df.loc[toronto_df['Borough']=='Not assigned'].index

In [180]:
drop_index

Int64Index([  0,   1,   7,  10,  15,  16,  19,  24,  25,  28,  29,  33,  34,
             35,  37,  38,  42,  43,  44,  51,  52,  53,  60,  61,  62,  69,
             70,  71,  78,  79,  87,  88,  96,  97, 101, 105, 106, 110, 115,
            118, 119, 123, 124, 125, 127, 128, 131, 132, 133, 134, 136, 137,
            140, 141, 145, 146, 149, 150, 154, 155, 158, 159, 161, 162, 163,
            164, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 179],
           dtype='int64')

In [187]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [194]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [195]:
toronto_df_copy.reset_index(inplace=True)

In [200]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [199]:
toronto_df_copy=toronto_df_copy.drop('index',1)

In [224]:
toronto_df_copy['Neighbourhood'].replace('/',',',regex=True,inplace=True)

In [216]:
t=toronto_df_copy[['Neighbourhood']]
t.replace('/',',',regex=True,inplace=True)

In [226]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [221]:
toronto_df_copy.shape

(103, 3)

## Testing Geopy library

In [227]:
from geopy.geocoders import Nominatim
nom=Nominatim()

  


In [229]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


In [254]:
toronto_df_copy.tail()

Unnamed: 0,Postcode,Borough,Neighbourhood
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea , Humber Bay , Mimico NE , The Queensway East , Royal York South East , Kingsway Park South East"
102,M8Z,Etobicoke,"Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West"


In [260]:
x=nom.geocode("Downtown Toronto Queen's Park, Toronto")

In [265]:
toronto_df_copy['Address']=toronto_df_copy['Borough']+" "+toronto_df_copy['Neighbourhood']

In [266]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address
0,M3A,North York,Parkwoods,North York Parkwoods
1,M4A,North York,Victoria Village,North York Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government"


In [267]:
toronto_df_copy['Coordinates']=toronto_df_copy['Address'].apply(nom.geocode)

In [268]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates
0,M3A,North York,Parkwoods,North York Parkwoods,"(Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada, (43.7587999, -79.3201966))"
1,M4A,North York,Victoria Village,North York Victoria Village,"(Victoria Village, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M4A 2B1, Canada, (43.732658, -79.3111892))"
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront",
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights","(Lawrence Heights, Eglinton—Lawrence, North York, Toronto, Golden Horseshoe, Ontario, M6A 2R1, Canada, (43.7227784, -79.4509332))"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government",


In [269]:
toronto_df_copy.tail()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North","Etobicoke The Kingsway , Montgomery Road , Old Mill North",
99,M4Y,Downtown Toronto,Church and Wellesley,Downtown Toronto Church and Wellesley,"(Holiday Inn Toronto Downtown Centre, 30, Carlton Street, Church-Wellesley Village, Toronto Centre, Old Toronto, Toronto, Golden Horseshoe, Ontario, M5B 1L2, Canada, (43.6617403, -79.3810866))"
100,M7Y,East Toronto,Business reply mail Processing CentrE,East Toronto Business reply mail Processing CentrE,
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea , Humber Bay , Mimico NE , The Queensway East , Royal York South East , Kingsway Park South East","Etobicoke Old Mill South , King's Mill Park , Sunnylea , Humber Bay , Mimico NE , The Queensway East , Royal York South East , Kingsway Park South East",
102,M8Z,Etobicoke,"Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West","Etobicoke Mimico NW , The Queensway West , South of Bloor , Kingsway Park South West , Royal York South West",


In [272]:
toronto_df_copy

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates
0,M3A,North York,Parkwoods,North York Parkwoods,"(Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada, (43.7587999, -79.3201966))"
1,M4A,North York,Victoria Village,North York Victoria Village,"(Victoria Village, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M4A 2B1, Canada, (43.732658, -79.3111892))"
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront",
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights","(Lawrence Heights, Eglinton—Lawrence, North York, Toronto, Golden Horseshoe, Ontario, M6A 2R1, Canada, (43.7227784, -79.4509332))"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government",
5,M9A,Etobicoke,Islington Avenue,Etobicoke Islington Avenue,"(Islington Avenue, The Queensway, Etobicoke—Lakeshore, Etobicoke, Toronto, Golden Horseshoe, Ontario, M8Z 6C7, Canada, (43.6225748, -79.5142154))"
6,M1B,Scarborough,"Malvern , Rouge","Scarborough Malvern , Rouge","(Baton Rouge, 520, Progress Avenue, Scarborough, Scarborough Centre, Scarborough, Toronto, Golden Horseshoe, Ontario, M1P 5J1, Canada, (43.7792995, -79.25712648470281))"
7,M3B,North York,Don Mills,North York Don Mills,"(Don Mills, Sheppard Avenue East, Parkway Forest, North York, Toronto, Golden Horseshoe, Ontario, M2J 5A7, Canada, (43.775347, -79.3459439))"
8,M4B,East York,"Parkview Hill , Woodbine Gardens","East York Parkview Hill , Woodbine Gardens",
9,M5B,Downtown Toronto,"Garden District, Ryerson","Downtown Toronto Garden District, Ryerson","(Ryerson Theatre, 31,43, Gerrard Street East, Downtown Yonge, Toronto Centre, Old Toronto, Toronto, Golden Horseshoe, Ontario, M5B 1G7, Canada, (43.6593908, -79.3795591))"


In [283]:
toronto_df_copy['Latitude']=toronto_df_copy['Coordinates'].apply(lambda x: x.latitude if x!=None else None)
toronto_df_copy['Longitude']=toronto_df_copy['Coordinates'].apply(lambda y: y.longitude if y!=None else None)

In [284]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates,Latitude,Longitude
0,M3A,North York,Parkwoods,North York Parkwoods,"(Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada, (43.7587999, -79.3201966))",43.7588,-79.320197
1,M4A,North York,Victoria Village,North York Victoria Village,"(Victoria Village, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M4A 2B1, Canada, (43.732658, -79.3111892))",43.732658,-79.311189
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront",,,
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights","(Lawrence Heights, Eglinton—Lawrence, North York, Toronto, Golden Horseshoe, Ontario, M6A 2R1, Canada, (43.7227784, -79.4509332))",43.722778,-79.450933
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government",,,


### Geopy.geoncoder doesn't do an appreciable job of finding coordinates for all the entities

In [292]:
toronto_df_copy.drop(['Latitude','Longitude'],1)

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates
0,M3A,North York,Parkwoods,North York Parkwoods,"(Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada, (43.7587999, -79.3201966))"
1,M4A,North York,Victoria Village,North York Victoria Village,"(Victoria Village, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M4A 2B1, Canada, (43.732658, -79.3111892))"
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront",
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights","(Lawrence Heights, Eglinton—Lawrence, North York, Toronto, Golden Horseshoe, Ontario, M6A 2R1, Canada, (43.7227784, -79.4509332))"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government",
5,M9A,Etobicoke,Islington Avenue,Etobicoke Islington Avenue,"(Islington Avenue, The Queensway, Etobicoke—Lakeshore, Etobicoke, Toronto, Golden Horseshoe, Ontario, M8Z 6C7, Canada, (43.6225748, -79.5142154))"
6,M1B,Scarborough,"Malvern , Rouge","Scarborough Malvern , Rouge","(Baton Rouge, 520, Progress Avenue, Scarborough, Scarborough Centre, Scarborough, Toronto, Golden Horseshoe, Ontario, M1P 5J1, Canada, (43.7792995, -79.25712648470281))"
7,M3B,North York,Don Mills,North York Don Mills,"(Don Mills, Sheppard Avenue East, Parkway Forest, North York, Toronto, Golden Horseshoe, Ontario, M2J 5A7, Canada, (43.775347, -79.3459439))"
8,M4B,East York,"Parkview Hill , Woodbine Gardens","East York Parkview Hill , Woodbine Gardens",
9,M5B,Downtown Toronto,"Garden District, Ryerson","Downtown Toronto Garden District, Ryerson","(Ryerson Theatre, 31,43, Gerrard Street East, Downtown Yonge, Toronto Centre, Old Toronto, Toronto, Golden Horseshoe, Ontario, M5B 1G7, Canada, (43.6593908, -79.3795591))"


### We therefore go in with the .csv file provided to append the coordinates

In [286]:
loc_df=pd.read_csv('Geospatial_Coordinates.csv')

In [290]:
loc_df.shape

(103, 3)

### We renamed the Postal Code column in the loc_df to Postcode for easy merge operation

In [337]:
test=pd.merge(toronto_df_copy,loc_df, on=['Postcode'], how='inner')

In [338]:
test.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,Coordinates,Latitude_x,Longitude_x,Latitude_y,Longitude_y
0,M3A,North York,Parkwoods,North York Parkwoods,"(Parkwoods Village Drive, Parkway East, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M3A 2X2, Canada, (43.7587999, -79.3201966))",,-79.320197,43.753259,-79.329656
1,M4A,North York,Victoria Village,North York Victoria Village,"(Victoria Village, Don Valley East, North York, Toronto, Golden Horseshoe, Ontario, M4A 2B1, Canada, (43.732658, -79.3111892))",,-79.311189,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront","Downtown Toronto Regent Park , Harbourfront",,,,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights","North York Lawrence Manor , Lawrence Heights","(Lawrence Heights, Eglinton—Lawrence, North York, Toronto, Golden Horseshoe, Ontario, M6A 2R1, Canada, (43.7227784, -79.4509332))",,-79.450933,43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government","Downtown Toronto Queen's Park , Ontario Provincial Government",,,,43.662301,-79.389494


In [339]:
toronto_df_copy=test

In [342]:
toronto_df_copy.drop(['Address','Coordinates','Latitude_x','Longitude_x'],1,inplace=True)

In [343]:
toronto_df_copy.rename(columns={'Latitude_y':'Latitude','Longitude_y':'Longitude'},inplace=True)

In [344]:
toronto_df_copy.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
