## web scraping from wikipedia

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
html = urlopen(url)

In [4]:
soup = BeautifulSoup(html, 'lxml')
type(soup)

bs4.BeautifulSoup

In [5]:
# Get the title
title = soup.title
print(title)

<title>List of postal codes of Canada: M - Wikipedia</title>


In [6]:
# Print out the text
text = soup.get_text()
print(soup.text)




List of postal codes of Canada: M - Wikipedia
document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XrRJvgpAEKcAAI@-EI0AAAAQ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":955414546,"wgRevisionId":955414546,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"List_of_postal_codes_of_Canada:_

In [7]:
soup.find_all('a')

[<a id="top"></a>,
 <a class="mw-jump-link" href="#mw-head">Jump to navigation</a>,
 <a class="mw-jump-link" href="#p-search">Jump to search</a>,
 <a href="/wiki/Postal_codes_in_Canada" title="Postal codes in Canada">postal codes in Canada</a>,
 <a href="/wiki/Toronto" title="Toronto">Toronto</a>,
 <a href="/wiki/Ontario" title="Ontario">Ontario</a>,
 <a href="/wiki/Canada_Post" title="Canada Post">Canada Post</a>,
 <a href="#cite_note-1">[1]</a>,
 <a href="/wiki/Mobile_app" title="Mobile app">applications</a>,
 <a class="mw-redirect" href="/wiki/Smartphones" title="Smartphones">smartphones</a>,
 <a href="/wiki/IPhone" title="IPhone">iPhone</a>,
 <a href="/wiki/BlackBerry" title="BlackBerry">BlackBerry</a>,
 <a href="#cite_note-2">[2]</a>,
 <a href="/wiki/CD-ROM" title="CD-ROM">CD-ROMs</a>,
 <a href="/wiki/Toronto" title="Toronto">Toronto</a>,
 <a href="/wiki/Postal_codes_in_Canada#Forward_sortation_areas" title="Postal codes in Canada">FSAs</a>,
 <a href="/w/index.php?title=List_of_po

In [8]:
all_links = soup.find_all("a")
for link in all_links:
    print(link.get("href"))

None
#mw-head
#p-search
/wiki/Postal_codes_in_Canada
/wiki/Toronto
/wiki/Ontario
/wiki/Canada_Post
#cite_note-1
/wiki/Mobile_app
/wiki/Smartphones
/wiki/IPhone
/wiki/BlackBerry
#cite_note-2
/wiki/CD-ROM
/wiki/Toronto
/wiki/Postal_codes_in_Canada#Forward_sortation_areas
/w/index.php?title=List_of_postal_codes_of_Canada:_M&action=edit&section=1
/wiki/Amazon_(company)
#cite_note-statcan-3
/w/index.php?title=List_of_postal_codes_of_Canada:_M&action=edit&section=2
#cite_note-statcan-3
/w/index.php?title=List_of_postal_codes_of_Canada:_M&action=edit&section=3
/w/index.php?title=List_of_postal_codes_of_Canada:_M&action=edit&section=4
#cite_ref-1
https://www.canadapost.ca/cpotools/apps/fpc/personal/findByCity?execution=e2s1
#cite_ref-2
https://web.archive.org/web/20110519093024/http://www.canadapost.ca/cpo/mc/personal/tools/mobileapp/default.jsf
http://www.canadapost.ca/cpo/mc/personal/tools/mobileapp/default.jsf
#cite_ref-statcan_3-0
#cite_ref-statcan_3-1
http://www12.statcan.ca/english/censu

In [9]:
# Print the first 10 rows for sanity check
rows = soup.find_all('tr')
print(rows[:10])

[<tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighborhood
</th></tr>, <tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>, <tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>, <tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>, <tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>, <tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>, <tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>
</td></tr>, <tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue
</td></tr>]


In [10]:
for row in rows:
    row_td = row.find_all('td')
print(row_td)
type(row_td)

[<td align="center" style="border: 1px solid #FF0000; background-color: #FFE0E0; font-size: 135%;" width="5%"><a href="/wiki/List_of_postal_codes_of_Canada:_A" title="List of postal codes of Canada: A">A</a>
</td>, <td align="center" style="border: 1px solid #FF4000; background-color: #FFE8E0; font-size: 135%;" width="5%"><a href="/wiki/List_of_postal_codes_of_Canada:_B" title="List of postal codes of Canada: B">B</a>
</td>, <td align="center" style="border: 1px solid #FF8000; background-color: #FFF0E0; font-size: 135%;" width="5%"><a href="/wiki/List_of_postal_codes_of_Canada:_C" title="List of postal codes of Canada: C">C</a>
</td>, <td align="center" style="border: 1px solid #FFC000; background-color: #FFF8E0; font-size: 135%;" width="5%"><a href="/wiki/List_of_postal_codes_of_Canada:_E" title="List of postal codes of Canada: E">E</a>
</td>, <td align="center" style="border: 1px solid #FFFF00; background-color: #FFFFE0; font-size: 135%;" width="5%"><a href="/wiki/List_of_postal_code

bs4.element.ResultSet

In [11]:
str_cells = str(row_td)
cleantext = BeautifulSoup(str_cells, "lxml").get_text()
print(cleantext)

[A
, B
, C
, E
, G
, H
, J
, K
, L
, M
, N
, P
, R
, S
, T
, V
, X
, Y
]


In [12]:
import re

list_rows = []
for row in rows:
    cells = row.find_all('td')
    str_cells = str(cells)
    clean = re.compile('<.*?>')
    clean2 = (re.sub(clean, '',str_cells))
    list_rows.append(clean2)
print(clean2)
type(clean2)

[A
, B
, C
, E
, G
, H
, J
, K
, L
, M
, N
, P
, R
, S
, T
, V
, X
, Y
]


str

In [13]:
df = pd.DataFrame(list_rows)
df.head(10)

Unnamed: 0,0
0,[]
1,"[M1A\n, Not assigned\n, \n]"
2,"[M2A\n, Not assigned\n, \n]"
3,"[M3A\n, North York\n, Parkwoods\n]"
4,"[M4A\n, North York\n, Victoria Village\n]"
5,"[M5A\n, Downtown Toronto\n, Regent Park, Harbo..."
6,"[M6A\n, North York\n, Lawrence Manor, Lawrence..."
7,"[M7A\n, Downtown Toronto\n, Queen's Park, Onta..."
8,"[M8A\n, Not assigned\n, \n]"
9,"[M9A\n, Etobicoke\n, Islington Avenue\n]"


In [14]:
df1 = df[0].str.split('\n', expand=True)
df1.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,85,86,87,88,89,90,91,92,93,94
0,[],,,,,,,,,,...,,,,,,,,,,
1,[M1A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
2,[M2A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
3,[M3A,", North York",", Parkwoods",],,,,,,,...,,,,,,,,,,
4,[M4A,", North York",", Victoria Village",],,,,,,,...,,,,,,,,,,
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront",],,,,,,,...,,,,,,,,,,
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights",],,,,,,,...,,,,,,,,,,
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government",],,,,,,,...,,,,,,,,,,
8,[M8A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
9,[M9A,", Etobicoke",", Islington Avenue",],,,,,,,...,,,,,,,,,,


In [15]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,85,86,87,88,89,90,91,92,93,94
0,[],,,,,,,,,,...,,,,,,,,,,
1,[M1A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
2,[M2A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
3,[M3A,", North York",", Parkwoods",],,,,,,,...,,,,,,,,,,
4,[M4A,", North York",", Victoria Village",],,,,,,,...,,,,,,,,,,
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront",],,,,,,,...,,,,,,,,,,
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights",],,,,,,,...,,,,,,,,,,
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government",],,,,,,,...,,,,,,,,,,
8,[M8A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
9,[M9A,", Etobicoke",", Islington Avenue",],,,,,,,...,,,,,,,,,,


In [16]:
df2 = df1.drop(df1.index[180:185])
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,85,86,87,88,89,90,91,92,93,94
0,[],,,,,,,,,,...,,,,,,,,,,
1,[M1A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
2,[M2A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
3,[M3A,", North York",", Parkwoods",],,,,,,,...,,,,,,,,,,
4,[M4A,", North York",", Victoria Village",],,,,,,,...,,,,,,,,,,
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront",],,,,,,,...,,,,,,,,,,
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights",],,,,,,,...,,,,,,,,,,
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government",],,,,,,,...,,,,,,,,,,
8,[M8A,", Not assigned",",",],,,,,,,...,,,,,,,,,,
9,[M9A,", Etobicoke",", Islington Avenue",],,,,,,,...,,,,,,,,,,


In [17]:
df2.drop(df2.columns[10:94], axis=1, inplace=True)
df2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,94
0,[],,,,,,,,,,
1,[M1A,", Not assigned",",",],,,,,,,
2,[M2A,", Not assigned",",",],,,,,,,
3,[M3A,", North York",", Parkwoods",],,,,,,,
4,[M4A,", North York",", Victoria Village",],,,,,,,
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront",],,,,,,,
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights",],,,,,,,
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government",],,,,,,,
8,[M8A,", Not assigned",",",],,,,,,,
9,[M9A,", Etobicoke",", Islington Avenue",],,,,,,,


In [18]:
df2.drop(df2.columns[4:95], axis=1, inplace=True)
df2

Unnamed: 0,0,1,2,3
0,[],,,
1,[M1A,", Not assigned",",",]
2,[M2A,", Not assigned",",",]
3,[M3A,", North York",", Parkwoods",]
4,[M4A,", North York",", Victoria Village",]
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront",]
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights",]
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government",]
8,[M8A,", Not assigned",",",]
9,[M9A,", Etobicoke",", Islington Avenue",]


In [19]:
df2.drop(df2.columns[3], axis=1, inplace=True)
df2

Unnamed: 0,0,1,2
0,[],,
1,[M1A,", Not assigned",","
2,[M2A,", Not assigned",","
3,[M3A,", North York",", Parkwoods"
4,[M4A,", North York",", Victoria Village"
5,[M5A,", Downtown Toronto",", Regent Park, Harbourfront"
6,[M6A,", North York",", Lawrence Manor, Lawrence Heights"
7,[M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government"
8,[M8A,", Not assigned",","
9,[M9A,", Etobicoke",", Islington Avenue"


In [20]:
df2[0] = df2[0].str.strip('[]')
df2.head(10)

Unnamed: 0,0,1,2
0,,,
1,M1A,", Not assigned",","
2,M2A,", Not assigned",","
3,M3A,", North York",", Parkwoods"
4,M4A,", North York",", Victoria Village"
5,M5A,", Downtown Toronto",", Regent Park, Harbourfront"
6,M6A,", North York",", Lawrence Manor, Lawrence Heights"
7,M7A,", Downtown Toronto",", Queen's Park, Ontario Provincial Government"
8,M8A,", Not assigned",","
9,M9A,", Etobicoke",", Islington Avenue"


In [21]:
df2[1] = df2[1].str.strip(',')
df2

Unnamed: 0,0,1,2
0,,,
1,M1A,Not assigned,","
2,M2A,Not assigned,","
3,M3A,North York,", Parkwoods"
4,M4A,North York,", Victoria Village"
5,M5A,Downtown Toronto,", Regent Park, Harbourfront"
6,M6A,North York,", Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,", Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,","
9,M9A,Etobicoke,", Islington Avenue"


In [22]:
df3=df2
df3[2] = df3[2].str.strip(',')
df3

Unnamed: 0,0,1,2
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,
9,M9A,Etobicoke,Islington Avenue


In [23]:
df3.dtypes

0    object
1    object
2    object
dtype: object

In [24]:
df3[2].replace(' ', np.nan, inplace=True)
print(df3)

       0                  1                                                  2
0                      None                                               None
1    M1A       Not assigned                                                NaN
2    M2A       Not assigned                                                NaN
3    M3A         North York                                          Parkwoods
4    M4A         North York                                   Victoria Village
5    M5A   Downtown Toronto                          Regent Park, Harbourfront
6    M6A         North York                   Lawrence Manor, Lawrence Heights
7    M7A   Downtown Toronto        Queen's Park, Ontario Provincial Government
8    M8A       Not assigned                                                NaN
9    M9A          Etobicoke                                   Islington Avenue
10   M1B        Scarborough                                     Malvern, Rouge
11   M2B       Not assigned                         

In [25]:
df3

Unnamed: 0,0,1,2
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,
9,M9A,Etobicoke,Islington Avenue


In [26]:
df4 = df3.dropna(axis=0, how='any')
df4

Unnamed: 0,0,1,2
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,"Malvern, Rouge"
12,M3B,North York,Don Mills
13,M4B,East York,"Parkview Hill, Woodbine Gardens"
14,M5B,Downtown Toronto,"Garden District, Ryerson"


In [27]:
col_labels = soup.find_all('th')

In [28]:
all_header = []
col_str = str(col_labels)
cleantext2 = BeautifulSoup(col_str, "lxml").get_text()
all_header.append(cleantext2)
print(all_header)

['[Postal Code\n, Borough\n, Neighborhood\n, Canadian postal codes\n]']


In [29]:
dfh = pd.DataFrame(all_header)
dfh

Unnamed: 0,0
0,"[Postal Code\n, Borough\n, Neighborhood\n, Can..."


In [30]:
dfh = dfh[0].str.split(',', expand=True)
dfh.head()

Unnamed: 0,0,1,2,3
0,[Postal Code\n,Borough\n,Neighborhood\n,Canadian postal codes\n]


In [31]:
dfh.drop(dfh.columns[3], axis=1, inplace=True)
dfh

Unnamed: 0,0,1,2
0,[Postal Code\n,Borough\n,Neighborhood\n


In [32]:
dfh[2] = dfh[2].str.strip('\n')
dfh[1] = dfh[1].str.strip('\n')
dfh[0] = dfh[0].str.strip('\n')
dfh[0] = dfh[0].str.strip('[]')
dfh

Unnamed: 0,0,1,2
0,Postal Code,Borough,Neighborhood


In [33]:
frames = [dfh, df4]

df5 = pd.concat(frames)
df5.head(10)

Unnamed: 0,0,1,2
0,Postal Code,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,"Malvern, Rouge"
12,M3B,North York,Don Mills
13,M4B,East York,"Parkview Hill, Woodbine Gardens"


In [34]:
df6 = df5.rename(columns=df5.iloc[0])
df6.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,Postal Code,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"


In [35]:
df6.info()
df6.shape

<class 'pandas.core.frame.DataFrame'>
Int64Index: 104 entries, 0 to 179
Data columns (total 3 columns):
Postal Code      104 non-null object
 Borough         104 non-null object
 Neighborhood    104 non-null object
dtypes: object(3)
memory usage: 3.2+ KB


(104, 3)

In [36]:
df7=df6.drop(df6.index[0])
df7

Unnamed: 0,Postal Code,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
9,M9A,Etobicoke,Islington Avenue
10,M1B,Scarborough,"Malvern, Rouge"
12,M3B,North York,Don Mills
13,M4B,East York,"Parkview Hill, Woodbine Gardens"
14,M5B,Downtown Toronto,"Garden District, Ryerson"


In [37]:
df7.reset_index(drop=True, inplace=True)
df7

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [38]:
df7.shape

(103, 3)

# Creating Dataframe with Latitude and Longitude of postal code

In [40]:
dfn=pd.read_csv("http://cocl.us/Geospatial_data")
dfn.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [41]:
dfn.shape

(103, 3)

In [42]:
dfx = df7
dfx = dfx.sort_values(by ='Postal Code' )
dfx

Unnamed: 0,Postal Code,Borough,Neighborhood
6,M1B,Scarborough,"Malvern, Rouge"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
32,M1J,Scarborough,Scarborough Village
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
58,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [43]:
dfn1 = dfn
dfn1 = dfn1.sort_values(by ='Postal Code' )
dfn1

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [44]:
result = pd.merge(dfx, dfn1, on='Postal Code')
result

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# Explore and cluster the neighborhoods in Toronto

In [45]:
result['Latitude'].dtypes

dtype('float64')

In [46]:
result.dtypes

Postal Code       object
 Borough          object
 Neighborhood     object
Latitude         float64
Longitude        float64
dtype: object

In [47]:
result[' Borough'].dtypes

dtype('O')

In [49]:
result.rename(columns={'Postal Code':'Postal Code', ' Borough':'Borough'," Neighborhood":"Neighborhood"}, inplace=True)

In [50]:
result["Borough"].value_counts()

 North York          24
 Downtown Toronto    19
 Scarborough         17
 Etobicoke           12
 Central Toronto      9
 West Toronto         6
 York                 5
 East York            5
 East Toronto         5
 Mississauga          1
Name: Borough, dtype: int64

In [52]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

# All requested packages already installed.

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

    altair:  4.1.0-py_1 conda-forge
    branca:  0.4.1-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Down

In [55]:
result

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [56]:
CLIENT_ID = 'LJZSPH3VDTJDGAPB4NWEI4QSX3MKHANOETI1IHYHDVZK0LAI' # your Foursquare ID
CLIENT_SECRET = 'SQMAH4A24V5EDDDJ3JTDZKBR2GIVALGGFAPUP3XTPVLGOY2U' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: LJZSPH3VDTJDGAPB4NWEI4QSX3MKHANOETI1IHYHDVZK0LAI
CLIENT_SECRET:SQMAH4A24V5EDDDJ3JTDZKBR2GIVALGGFAPUP3XTPVLGOY2U


In [61]:
mydata = result.copy()
mydata

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [73]:
mydata.drop(mydata.columns[0], axis=1, inplace=True)
mydata

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476
5,Scarborough,Scarborough Village,43.744734,-79.239476
6,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [75]:
mydata[mydata['Borough'] == 'North York'].reset_index(drop=True)
mydata

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476
5,Scarborough,Scarborough Village,43.744734,-79.239476
6,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [67]:
mydata.loc[0, 'Neighborhood']

' Malvern, Rouge'