## Part 1 - Extract Data from Wikipedia 

In [1]:
import pandas as pd
import numpy as np
import requests

from bs4 import BeautifulSoup 

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
df = pd.read_html(url, header = 0) # Extracting the table and assigning headers

In [4]:
df2 = df[0] # slice to only use one of the tables

In [5]:
#droped all Not assigned 
df2= df2.drop(df2[df2.Borough == 'Not assigned'].index)
df2.head(8)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills


In [6]:
#Number of raws in the dataframe
df2.shape

(103, 3)

## Part 2 - Merge with Acquired Geospacial Coordinates

In [7]:
# Load the data in a dataframe, merge with initial data
df_geo = pd.read_csv("Geospatial_Coordinates.csv")

In [39]:
df_merged = pd.merge(df2, df_geo, on=['Postal Code'])
df_merged.head(8)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188


## Part 3 - Analyze, Cluster and Label Data

In [20]:
import folium
import json
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim

In [11]:
# Begin by stripping out rows that don't contain the word "Toronto" in the Borough column
df_tor = df_merged[df_merged.Borough.str.contains("Toronto")].reset_index(drop=True)
df_tor.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [26]:
CLIENT_ID = '32HAJRP313FHYC5QLOFPTQL33QZADAH5PAUEPCLMF4QLFCSY' # your Foursquare ID
CLIENT_SECRET = 'EAYJPURSR1XBANJ2QUZSXZYSI0404MKCOUQ2MWPQY0IQDYV0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [27]:
# Get coordinates for Toronto
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [41]:
# create map using latitude and longitude values
map_tor = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, neighborhood in zip(df_tor['Latitude'], df_tor['Longitude'], df_tor['Neighborhood']):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

In [29]:
neighborhood_latitude = df_tor.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_tor.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_tor.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


In [31]:
# Requesting information of the Neighborhood
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, radius, LIMIT)
results = requests.get(url).json()
venues1 = results['response']['venues']
venues1 = pd.json_normalize(venues1)
venues1. head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.address,location.crossStreet,venuePage.id
0,5bdc6c2bba57b4002c4c71a8,Oldtown Bodega,"[{'id': '4bf58dd8d48988d16d941735', 'name': 'C...",v-1594222311,False,43.653966,-79.360752,"[{'label': 'display', 'lat': 43.653966, 'lng':...",34,M5A 1L6,CA,Toronto,ON,Canada,"[Toronto ON M5A 1L6, Canada]",,,
1,4bc70f5d14d7952126a066e9,Sackville Playground,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",v-1594222311,False,43.654656,-79.359871,"[{'label': 'display', 'lat': 43.65465604258614...",75,,CA,Toronto,ON,Canada,"[420 king st E, Toronto ON, Canada]",420 king st E,,
2,53b8466a498e83df908c3f21,Tandem Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",v-1594222311,False,43.653559,-79.361809,"[{'label': 'display', 'lat': 43.65355870959944...",122,,CA,Toronto,ON,Canada,"[368 King St E (at Trinity St), Toronto ON, Ca...",368 King St E,at Trinity St,
3,4b0d4672f964a520854523e3,TTC Streetcar #504 King St,"[{'id': '4f2a23984b9023bd5841ed2c', 'name': 'M...",v-1594222311,False,43.641273,-79.447163,"[{'label': 'display', 'lat': 43.64127266669386...",7118,,CA,Toronto,ON,Canada,"[King St. (Moving Target!), Toronto ON, Canada]",King St.,Moving Target!,
4,50760559e4b0e8c7babe2497,Body Blitz Spa East,"[{'id': '4bf58dd8d48988d1ed941735', 'name': 'S...",v-1594222311,False,43.654735,-79.359874,"[{'label': 'display', 'lat': 43.65473505045365...",80,M5A 1L9,CA,Toronto,ON,Canada,[497 King Street East (btwn Sackville St and S...,497 King Street East,btwn Sackville St and Sumach St,


In [32]:
# function that extracts the category of the venue from JSON
def get_category_type(row):
    try:
        #print(row['categories'][0]['name'])
        return row['categories'][0]['name']
    except:
        #print("Uncategorized")
        return "Uncategorized"

In [33]:
# filter columns
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
venues = venues1.loc[:, filtered_columns]

# filter the category for each row
venues['categories'] = venues.apply(get_category_type, axis=1)

# clean columns
#venues.columns = [col.split(".")[-1] for col in venues.columns]
#venues = venues.dropna(how='any',axis=0)
#venues.reset_index(drop=True, inplace=True)

venues.head()

Unnamed: 0,name,categories,location.lat,location.lng
0,Oldtown Bodega,Café,43.653966,-79.360752
1,Sackville Playground,Park,43.654656,-79.359871
2,Tandem Coffee,Coffee Shop,43.653559,-79.361809
3,TTC Streetcar #504 King St,Moving Target,43.641273,-79.447163
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [43]:
# filter columns
filtered_columns = ['name', 'categories', 'location.lat', 'location.lng']
venues = venues1.loc[:, filtered_columns]

# filter the category for each row
venues['categories'] = venues.apply(get_category_type, axis=1)

# clean columns
#venues.columns = [col.split(".")[-1] for col in venues.columns]
#venues = venues.dropna(how='any',axis=0)
#venues.reset_index(drop=True, inplace=True)

venues

Unnamed: 0,name,categories,location.lat,location.lng
0,Oldtown Bodega,Café,43.653966,-79.360752
1,Sackville Playground,Park,43.654656,-79.359871
2,Tandem Coffee,Coffee Shop,43.653559,-79.361809
3,TTC Streetcar #504 King St,Moving Target,43.641273,-79.447163
4,Body Blitz Spa East,Spa,43.654735,-79.359874
...,...,...,...,...
95,T4G Limited - Toronto,Office,43.658367,-79.349778
96,Absolute Location Services,Office,43.654415,-79.355260
97,The Corktown Kitchen,Gastropub,43.653316,-79.362140
98,Worldwide Quest,General Travel,43.654659,-79.359830
