In [1]:
!pip install bs4



In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


In [3]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url)
My_table = soup.find('table',{'class':'wikitable sortable'})

**Tranform the data into a pandas dataframe**

In [4]:
Head = My_table.find_all('tr')
Table = []
for th in Head:
    row = np.array(th.getText()[1:-1].split('\n'))
    if row[1] != 'Not assigned': 
        if row[2] == 'Not assigned':
            row[2] = row[1]
        Table.append(row)
        
df_Canada = pd.DataFrame(data=Table[1:], columns=Table[0])
df_Canada.head()

Unnamed: 0,Postal Code,Unnamed: 2,Borough,Unnamed: 4,Neighborhood
0,M1A,,,,Not assigned
1,M2A,,,,Not assigned
2,M3A,,North York,,Parkwoods
3,M4A,,North York,,Victoria Village
4,M5A,,Downtown Toronto,,"Regent Park, Harbourfront"


In [5]:
Col = df_Canada.columns
Col

Index(['Postal Code', '', 'Borough', '', 'Neighborhood'], dtype='object')

In [6]:
Col = df_Canada.columns
df_Canada_Group = df_Canada.groupby(Col[0])[Col[4]].apply(lambda x: ''.join([str(elem+',') for elem in list(x)])).reset_index()
df_Canada_Group.head()

Unnamed: 0,Postal Code,Neighborhood
0,M1A,"Not assigned,"
1,M1B,"Malvern, Rouge,"
2,M1C,"Rouge Hill, Port Union, Highland Creek,"
3,M1E,"Guildwood, Morningside, West Hill,"
4,M1G,"Woburn,"


The next task is essentially transforming this data of nested Python dictionaries into a pandas dataframe. So let's start by creating an empty dataframe.

In [7]:
# define the dataframe columns
column_names = ['Postal Code', 'Borough', 'Neighbourhood']

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

In [8]:
df_repeated = df_Canada.sort_values('Postal Code', ascending=True)
df_repeated.drop_duplicates('Postal Code', keep="last", inplace=True)
df_Canada_Group['Borough'] = df_repeated.reset_index()['Borough']

**Geospatial data**

In [9]:
!pip install geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 2.2MB/s 
Collecting ratelim
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [10]:
!wget -q -O 'postal_data.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [11]:
df_csv = pd.read_csv('postal_data.csv')
df_csv.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
df_Canada_Group['Latitude'] = df_csv['Latitude']
df_Canada_Group['Longitude'] = df_csv['Longitude']
df_Canada_Group

Unnamed: 0,Postal Code,Neighborhood,Borough,Latitude,Longitude
0,M1A,"Not assigned,",,43.806686,-79.194353
1,M1B,"Malvern, Rouge,",Scarborough,43.784535,-79.160497
2,M1C,"Rouge Hill, Port Union, Highland Creek,",Scarborough,43.763573,-79.188711
3,M1E,"Guildwood, Morningside, West Hill,",Scarborough,43.770992,-79.216917
4,M1G,"Woburn,",Scarborough,43.773136,-79.239476
5,M1H,"Cedarbrae,",Scarborough,43.744734,-79.239476
6,M1J,"Scarborough Village,",Scarborough,43.727929,-79.262029
7,M1K,"Kennedy Park, Ionview, East Birchmount Park,",Scarborough,43.711112,-79.284577
8,M1L,"Golden Mile, Clairlea, Oakridge,",Scarborough,43.716316,-79.239476
9,M1M,"Cliffside, Cliffcrest, Scarborough Village West,",Scarborough,43.692657,-79.264848
