In [34]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


# Wrap the dataset from Wikipedia

In [35]:
import urllib.request
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
#page = urllib.request.urlopen(url)

In [36]:
r = requests.get(url)

In [37]:
tables = pd.read_html(r.text)

df=pd.DataFrame(tables[0])

In [39]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)

In [56]:
df1=df.groupby("Postal Code").agg(lambda x:','.join(set(x)))
df1.loc[df1['Neighborhood']=="Not assigned",'Neighborhood']=df1.loc[df1['Neighborhood']=="Not assigned",'Borough']
df1.head()

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [63]:
df1.shape

(103, 2)

# Get Latitude and Longitude

In [51]:
ds=pd.read_csv('Geospatial_Coordinates.csv')
ds.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [72]:
df2 = pd.merge(left = df1, right = ds, how='left', left_on='Postal Code', right_on='Postal Code')
df2

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


# work with only boroughs that contain the word Toronto

In [78]:
L1=df2.loc[df2['Borough']=='East Toronto',:]
L2=df2.loc[df2['Borough']=='Central Toronto',:]
L3=df2.loc[df2['Borough']=='Downtown Toronto',:]
L4=df2.loc[df2['Borough']=='West Toronto',:]


In [82]:
Toronto=L1.append(L2)
Toronto=Toronto.append(L3)
Toronto=Toronto.append(L4)
Toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
87,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


In [83]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [84]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<folium.features.CircleMarker at 0x1a1aeeeed0>

<folium.features.CircleMarker at 0x1a1bdc9f90>

<folium.features.CircleMarker at 0x1a1c012b50>

<folium.features.CircleMarker at 0x1a1c153b10>

<folium.features.CircleMarker at 0x1a1bfec6d0>

<folium.features.CircleMarker at 0x1a1c012c50>

<folium.features.CircleMarker at 0x1a1c2c34d0>

<folium.features.CircleMarker at 0x1a1c034790>

<folium.features.CircleMarker at 0x1a1c031250>

<folium.features.CircleMarker at 0x1a1bfcac90>

<folium.features.CircleMarker at 0x1a1c031510>

<folium.features.CircleMarker at 0x1a1bf31890>

<folium.features.CircleMarker at 0x1a1bf31b10>

<folium.features.CircleMarker at 0x1a1c003e10>

<folium.features.CircleMarker at 0x1a1c16a710>

<folium.features.CircleMarker at 0x1a1c034fd0>

<folium.features.CircleMarker at 0x1a1c035710>

<folium.features.CircleMarker at 0x1a1c284f50>

<folium.features.CircleMarker at 0x1a1c034c50>

<folium.features.CircleMarker at 0x1a1c02acd0>

<folium.features.CircleMarker at 0x1a1bff2150>

<folium.features.CircleMarker at 0x1a1c2e3b10>

<folium.features.CircleMarker at 0x1a1c2dfc50>

<folium.features.CircleMarker at 0x1a1c28ab10>

<folium.features.CircleMarker at 0x1a1c02d490>

<folium.features.CircleMarker at 0x1a1c2c3050>

<folium.features.CircleMarker at 0x1a1c02a310>

<folium.features.CircleMarker at 0x1a1c315690>

<folium.features.CircleMarker at 0x1a1c28a7d0>

<folium.features.CircleMarker at 0x1a1c33a750>

<folium.features.CircleMarker at 0x1a1c33a310>

<folium.features.CircleMarker at 0x1a1c034d50>

<folium.features.CircleMarker at 0x1a1c2daa50>

<folium.features.CircleMarker at 0x1a1c3154d0>

<folium.features.CircleMarker at 0x1a1c153a10>

<folium.features.CircleMarker at 0x1a1c367410>

<folium.features.CircleMarker at 0x1a1c315490>

<folium.features.CircleMarker at 0x1a1c2ddc10>

<folium.features.CircleMarker at 0x1a1c2dfa50>