Importing the required modules

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests

Accessing the specified webpage & downloading the table into the program

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')
table = str(soup.find_all('table')[0])

Loading the table into a dataframe

In [3]:
NDF = pd.DataFrame()
NDF = pd.read_html(table)
# NDF

In [4]:
NDF = pd.DataFrame.from_records(NDF[0])
NDF.columns = ['Postal Code', 'Borough', 'Neighbourhood']
NDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [5]:
# drop rows where Borough == 'Not assigned'
NDF = NDF[NDF.Borough != 'Not assigned']

In [6]:
# Copy Borough to Neighbourhood where Neighbourhood is not assigned
NDF['Neighbourhood'] = np.where(NDF['Neighbourhood'] == 'Not assigned', NDF['Borough'], NDF['Neighbourhood'])

In [7]:
# drop row of column names
NDF = NDF.drop(NDF.index[0])
NDF.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


Creating a series of combined neighbourhoods

In [8]:
NS = NDF.groupby('Postal Code', axis=0)['Neighbourhood'].apply(list)
NS.head(10)

Postal Code
M1B                                     [Rouge, Malvern]
M1C             [Highland Creek, Rouge Hill, Port Union]
M1E                  [Guildwood, Morningside, West Hill]
M1G                                             [Woburn]
M1H                                          [Cedarbrae]
M1J                                [Scarborough Village]
M1K        [East Birchmount Park, Ionview, Kennedy Park]
M1L                    [Clairlea, Golden Mile, Oakridge]
M1M    [Cliffcrest, Cliffside, Scarborough Village West]
M1N                        [Birch Cliff, Cliffside West]
Name: Neighbourhood, dtype: object

Iterating over rows in the dataframe to insert the combined Neighbourhood information

In [9]:
for r in range(0, len(NDF)):
    # print(NDF['Postal Code'].iloc[r])
    N = NDF['Neighbourhood'].iloc[r]
    for r2 in range(0, len(NS)):
        #print(NS[r2], N)
        # print(N)
        if str(N) in NS[r2]:
            # print('FY')
            NDF['Neighbourhood'].iloc[r] = NS[r2]
NDF.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,[Parkwoods]
4,M4A,North York,[Victoria Village]
5,M5A,Downtown Toronto,"[Harbourfront, Regent Park]"
6,M5A,Downtown Toronto,"[Harbourfront, Regent Park]"
7,M6A,North York,"[Lawrence Heights, Lawrence Manor]"
8,M6A,North York,"[Lawrence Heights, Lawrence Manor]"
9,M7A,Queen's Park,[Queen's Park]
11,M9A,Etobicoke,[Islington Avenue]
12,M1B,Scarborough,"[Rouge, Malvern]"
13,M1B,Scarborough,"[Rouge, Malvern]"


In [19]:
NDF = NDF.drop_duplicates()
NDF.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Harbourfront', 'Regent Park"
7,M6A,North York,"Lawrence Heights', 'Lawrence Manor"
9,M7A,Queen's Park,"""Queen's Park"""
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,"Rouge', 'Malvern"
15,M3B,North York,Don Mills North
16,M4B,East York,"Woodbine Gardens', 'Parkview Hill"
18,M5B,Downtown Toronto,"Ryerson', 'Garden District"


Stripping some of the characters from the Neighbourhood series

In [20]:
NDF['Neighbourhood'] = NDF['Neighbourhood'].astype(str).str.strip("]").str.strip("[").str.lstrip("'").str.rstrip("'")
NDF.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Harbourfront', 'Regent Park"
7,M6A,North York,"Lawrence Heights', 'Lawrence Manor"
9,M7A,Queen's Park,"""Queen's Park"""
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,"Rouge', 'Malvern"
15,M3B,North York,Don Mills North
16,M4B,East York,"Woodbine Gardens', 'Parkview Hill"
18,M5B,Downtown Toronto,"Ryerson', 'Garden District"


Displaying Dataframe shape as per assignment instructions

In [21]:
NDF.shape

(105, 3)

In [22]:
import csv
# import urllib2
url = 'https://cocl.us/Geospatial_data'
GDF = pd.read_csv(url)
GDF.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
NDF2 = pd.merge(NDF, GDF, on='Postal Code')
NDF2.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront', 'Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights', 'Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,"""Queen's Park""",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge', 'Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens', 'Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson', 'Garden District",43.657162,-79.378937


In [33]:
import folium
LA = 43.654260
LT = -79.360636
TOmap = folium.Map(location=[LA, LT], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(NDF2['Latitude'], NDF2['Longitude'], NDF2['Borough'], NDF2['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng], radius=5, popup=label, color='blue', fill=True).add_to(TOmap)

TOmap