## Importing libraries

In [17]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')




Libraries imported.


## Using Beautiful Soup 4 to scrap the website

In [18]:
from bs4 import BeautifulSoup
import urllib.request, urllib.parse, urllib.error
import ssl

# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE


In [19]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urllib.request.urlopen(url, context=ctx).read()	# html parser
soup = BeautifulSoup(html, 'html.parser')

In [20]:
soup

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>List of postal codes of Canada: M - Wikipedia</title>
<script>document.documentElement.className = document.documentElement.className.replace( /(^|\s)client-nojs(\s|$)/, "$1client-js$2" );</script>
<script>(window.RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":857830462,"wgRevisionId":857830462,"wgArticleId":539066,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":false,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","

## Directly sending that data to pandas dataframe

In [21]:
table = pd.read_html(url,thousands=' ', header=0, index_col=0)[0]
table.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Harbourfront


## Cleaning the table

In [23]:
cleaned_table = table.copy()
cleaned_table = cleaned_table[~cleaned_table.select_dtypes(['object']).eq('Not assigned').any(1)]
cleaned_table.head()

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Harbourfront
M5A,Downtown Toronto,Regent Park
M6A,North York,Lawrence Heights


## Calculating the shape of the dataframe

In [24]:
cleaned_table.shape

(211, 2)

## Importing the geo-coder API and finding out the latitudes and longitudes

## Example

In [30]:
import geocoder

# initialize your variable to None
lat_lng_coords = None
postal_code = 1
# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
(latitude, longitude)

(43.6332593, -79.35451490000001)

In [40]:
for i, row in cleaned_table.iterrows():
    print(i)
    for j, column in row.iteritems():
        print(column)
        break
    lat_lng_coords = None
    postal_code = i
    while(lat_lng_coords is None):
        g = geocoder.google('{},{}'.format(postal_code,column))
        lat_lng_coords = g.latlng
    cleaned_table['latitude'] = lat_lng_coords[0]
    cleaned_table['longitude'] = lat_lng_coords[1]
cleaned_table

M3A
North York
M4A
North York
M5A
Downtown Toronto
M5A
Downtown Toronto
M6A
North York
M6A
North York
M9A
Etobicoke
M1B
Scarborough
M1B
Scarborough
M3B
North York
M4B
East York
M4B
East York
M5B
Downtown Toronto
M5B
Downtown Toronto
M6B
North York
M9B
Etobicoke
M9B
Etobicoke
M9B
Etobicoke
M9B
Etobicoke
M9B
Etobicoke
M1C
Scarborough
M1C
Scarborough
M1C
Scarborough
M3C
North York
M3C
North York
M4C
East York
M5C
Downtown Toronto
M6C
York
M9C
Etobicoke
M9C
Etobicoke
M9C
Etobicoke
M9C
Etobicoke
M1E
Scarborough
M1E
Scarborough
M1E
Scarborough
M4E
East Toronto
M5E
Downtown Toronto


Status code Unknown from https://maps.googleapis.com/maps/api/geocode/json: ERROR - HTTPSConnectionPool(host='maps.googleapis.com', port=443): Read timed out. (read timeout=5.0)


M6E
York
M1G
Scarborough
M4G
East York
M5G
Downtown Toronto
M6G
Downtown Toronto
M1H
Scarborough
M2H
North York
M3H
North York
M3H
North York
M3H
North York
M4H
East York
M5H
Downtown Toronto
M5H
Downtown Toronto
M5H
Downtown Toronto
M6H
West Toronto
M6H
West Toronto
M1J
Scarborough
M2J
North York
M2J
North York
M2J
North York
M3J
North York
M3J
North York
M4J
East York
M5J
Downtown Toronto
M5J
Downtown Toronto
M5J
Downtown Toronto
M6J
West Toronto
M6J
West Toronto
M1K
Scarborough
M1K
Scarborough
M1K
Scarborough
M2K
North York
M3K
North York
M3K
North York
M4K
East Toronto
M4K
East Toronto
M5K
Downtown Toronto
M5K
Downtown Toronto
M6K
West Toronto
M6K
West Toronto
M6K
West Toronto
M1L
Scarborough
M1L
Scarborough
M1L
Scarborough
M2L
North York
M2L
North York
M3L
North York
M4L
East Toronto
M4L
East Toronto
M5L
Downtown Toronto
M5L
Downtown Toronto
M6L
North York
M6L
North York
M6L
North York
M9L
North York
M1M
Scarborough
M1M
Scarborough
M1M
Scarborough
M2M
North York
M2M
North York
M3M

Unnamed: 0_level_0,Borough,Neighbourhood,latitude,longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.628841,-79.520999
M4A,North York,Victoria Village,43.628841,-79.520999
M5A,Downtown Toronto,Harbourfront,43.628841,-79.520999
M5A,Downtown Toronto,Regent Park,43.628841,-79.520999
M6A,North York,Lawrence Heights,43.628841,-79.520999
M6A,North York,Lawrence Manor,43.628841,-79.520999
M9A,Etobicoke,Islington Avenue,43.628841,-79.520999
M1B,Scarborough,Rouge,43.628841,-79.520999
M1B,Scarborough,Malvern,43.628841,-79.520999
M3B,North York,Don Mills North,43.628841,-79.520999


## The cleaned table after obtaining latitudes and longitudes

In [41]:
cleaned_table.head()

Unnamed: 0_level_0,Borough,Neighbourhood,latitude,longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.628841,-79.520999
M4A,North York,Victoria Village,43.628841,-79.520999
M5A,Downtown Toronto,Harbourfront,43.628841,-79.520999
M5A,Downtown Toronto,Regent Park,43.628841,-79.520999
M6A,North York,Lawrence Heights,43.628841,-79.520999


In [42]:
toronto_table = cleaned_table.copy()
toronto_table.head()

Unnamed: 0_level_0,Borough,Neighbourhood,latitude,longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M3A,North York,Parkwoods,43.628841,-79.520999
M4A,North York,Victoria Village,43.628841,-79.520999
M5A,Downtown Toronto,Harbourfront,43.628841,-79.520999
M5A,Downtown Toronto,Regent Park,43.628841,-79.520999
M6A,North York,Lawrence Heights,43.628841,-79.520999


## To get the cleaned table containing only toronto

In [55]:
some_values = "Toronto"
#toronto_table.loc[toronto_table['Borough'].isin(some_values)]
#toronto_table.loc[toronto_table['Borough'] == some_values]
cleaned_toronto_table = toronto_table[toronto_table['Borough'].str.contains(some_values)]

In [57]:
cleaned_toronto_table

Unnamed: 0_level_0,Borough,Neighbourhood,latitude,longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M5A,Downtown Toronto,Harbourfront,43.628841,-79.520999
M5A,Downtown Toronto,Regent Park,43.628841,-79.520999
M5B,Downtown Toronto,Ryerson,43.628841,-79.520999
M5B,Downtown Toronto,Garden District,43.628841,-79.520999
M5C,Downtown Toronto,St. James Town,43.628841,-79.520999
M4E,East Toronto,The Beaches,43.628841,-79.520999
M5E,Downtown Toronto,Berczy Park,43.628841,-79.520999
M5G,Downtown Toronto,Central Bay Street,43.628841,-79.520999
M6G,Downtown Toronto,Christie,43.628841,-79.520999
M5H,Downtown Toronto,Adelaide,43.628841,-79.520999


In [60]:
cleaned_toronto_tabl = cleaned_toronto_table.copy()


In [74]:
x = cleaned_toronto_tabl['latitude']
y = cleaned_toronto_tabl['longitude']
for i1,i2 in zip(x,y):
    #print(i1,i2)
    folium.CircleMarker(
        [i1, i2],
        radius=5,
        #popup=label,
        color='#43d9de',
        fill=True,
        fill_color='#43d9de',
        fill_opacity=0.7).add_to(map_clusters)
    #folium.Marker( location=[ i1, i2 ], fill_color='#43d9de', radius=8 ).add_to( mapit )
    #mapit = folium.Map( location=[ i1, i2 ] )
map_clusters

In [None]:
'''
import folium
mapit = None
for coord in latlon:
    mapit = folium.Map( location=[ coord[0], coord[1] ] )
'''