# Section 1: Scraping Wikipedia & Creating a Data Frame

In [5]:
import pandas as pd

URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
tbl = pd.read_html(URL,header=0)
Can_post = pd.DataFrame(tbl[0])

Can_post.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [8]:
# to ignore the rows where Borough is "Not assigned"

Can_post_cln = Can_post[Can_post.Borough != 'Not assigned']
Can_post_cln.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [9]:
# to combine the Neighbourhood values with similar Postcode

Can_post_cln = Can_post_cln.groupby(['Postcode', 'Borough'], as_index=False)['Neighborhood'].agg(lambda col: ', '.join(col))
Can_post_cln.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [10]:
# to replace Neighborhood "Not assigned" with corresponding Borough Value

Can_post_cln.loc[Can_post_cln['Neighborhood'] == 'Not assigned', 'Neighborhood'] = Can_post_cln['Borough']
Can_post_cln.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [11]:
Can_post_cln.shape #This ends the Section 1

(103, 3)

# Section 2: Mapping Latitude & Longitude

In [12]:
# reading the latitude & longitude data from given csv file

location = pd.read_csv("http://cocl.us/Geospatial_data")
location.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [13]:
# Modify the column names only to remove white space in first column's name 'Postal Code'

col_name = ['Postal_code', 'Latitude', 'Longitude']
location.columns = col_name
location.head()

Unnamed: 0,Postal_code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [14]:
# calling latitude & longitude in originl dataframe

Can_post_cln = pd.merge(Can_post_cln, location, left_on='Postcode', right_on='Postal_code', how='left')
Can_post_cln = Can_post_cln.drop('Postal_code', axis = 1)

Can_post_cln.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Section 3: Exploring and Clustering Neighborhoods

In [15]:
# defining a new dataframe for Toronto borough only

toronto_data = Can_post_cln[Can_post_cln['Borough'].str.contains('Toronto', regex=False, case=False, na=False)]
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [16]:
toronto_data.shape

(39, 5)

In [1]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

print('Libraries imported.')

Libraries imported.


In [3]:
# get latitude & logitude for Toronto

address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.653963, -79.387207.


In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Let's explore ifferent vanues in "The Beaches" Neighborhood in East Toronto

In [20]:
# get latitude & logitude for The Beaches neighorhood

address = 'The Beaches, East Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6710244, -79.296712.


In [37]:
CLIENT_ID = '' # removed because of sensitive information
CLIENT_SECRET = '' # removed because of sensitive information
VERSION = '20180604'
radius = 500
LIMIT = 50
latitude = 40.715337
longitude = -74.008848
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)


In [32]:
import requests
from pandas.io.json import json_normalize

In [36]:
results = requests.get(url).json()
items = results['response']['groups'][0]['items']
dataframe = json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(6) # datafrme of venues near the Beaches neighborhood in Toronto

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,Korin,"[{'id': '4bf58dd8d48988d1f8941735', 'name': 'F...",57 Warren St,US,New York,United States,Church St,73,"[57 Warren St (Church St), New York, NY 10007,...","[{'label': 'display', 'lat': 40.71482437714839...",40.714824,-74.009404,Tribeca,10007,NY,4af5d65ff964a52091fd21e3
1,Chambers Street Wines,"[{'id': '4bf58dd8d48988d119951735', 'name': 'W...",148 Chambers St,US,New York,United States,btwn West Broadway & Hudson St,88,[148 Chambers St (btwn West Broadway & Hudson ...,"[{'label': 'display', 'lat': 40.71577306392837...",40.715773,-74.009718,,10007,NY,4adcf23cf964a520cc6221e3
2,Takahachi Bakery,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",25 Murray St,US,New York,United States,at Church St,187,"[25 Murray St (at Church St), New York, NY 100...","[{'label': 'display', 'lat': 40.71365284530189...",40.713653,-74.008804,,10007,NY,4c154c9a77cea593c401d260
3,Juice Press,"[{'id': '4bf58dd8d48988d1d3941735', 'name': 'V...",83 Murray St,US,New York,United States,btwn Greenwich St & W Broadway,202,[83 Murray St (btwn Greenwich St & W Broadway)...,"[{'label': 'display', 'lat': 40.71478769908051...",40.714788,-74.011132,,10007,NY,54148bc6498ea7bb8c05b70a
4,Takahachi,"[{'id': '4bf58dd8d48988d1d2941735', 'name': 'S...",145 Duane St,US,New York,United States,btwn W Broadway & Church St,146,"[145 Duane St (btwn W Broadway & Church St), N...","[{'label': 'display', 'lat': 40.71652647412374...",40.716526,-74.008101,,10013,NY,4a8f2f39f964a520471420e3
5,Los Tacos No. 1,"[{'id': '4bf58dd8d48988d151941735', 'name': 'T...",136 Church St,US,New York,United States,,119,"[136 Church St, New York, NY 10007, United Sta...","[{'label': 'display', 'lat': 40.714267, 'lng':...",40.714267,-74.008756,,10007,NY,5d5f24ec09484500079aee00
