# Neighborhoods in Toronto

### In below cell, we are doing web scraping and extraction using BeautifulSoup and requests module 
1. importing modules
2. using requests, getting the text of the webpage
3. converting the source text to BeautifulSoup object and reading the table
4. looping through the table 
        a. find all td tags
        b. ignore rows with "Not assigned" in Borough coloumn
        c. if repeated postcode, append Neighborhood to previous element
        d. else append the new row and also check for Neighborhood value. if it contains "Not assigned" use corresponding Borough value

In [57]:
from bs4 import BeautifulSoup
import requests

wiki_source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

soup = BeautifulSoup(wiki_source,'lxml')
table = soup.table

extracted_table = []
prev_postcode = ''
for tr in table.find_all('tr')[1:]:    
    tds = tr.find_all('td')    
    if tds[1].text != "Not assigned":
        if prev_postcode == tds[0].text:
            extracted_table[-1][-1] = '%s,%s'% (extracted_table[-1][-1],
                                                tds[1].text if tds[2].text.rstrip() == "Not assigned" else tds[2].text.rstrip())
        else:
            extracted_table.append([tds[0].text,
                                    tds[1].text,
                                    tds[1].text if tds[2].text.rstrip() == "Not assigned" else tds[2].text.rstrip()])
    prev_postcode = tds[0].text

### In below cells we are converting list to panads dataframe and displaying the data and shape

In [60]:
import pandas as pd
headers = ['PostalCode','Borough','Neighborhood']
df = pd.DataFrame(extracted_table,columns=headers)
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [61]:
df.shape

(103, 3)

### Getting the latitude and the longitude coordinates of each neighborhood

In [67]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [68]:
df_with_ll = pd.merge(df, df_data_1, left_on='PostalCode', right_on='Postal Code', how='left').drop('Postal Code', axis=1)
df_with_ll

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


In [77]:
#df_borough_with_toronto = df_with_ll.loc['Toronto' in df_with_ll['Borough']]
df_borough_with_toronto = df_with_ll[df_with_ll['Borough'].str.contains("Toronto")==True]
df_borough_with_toronto.shape

(38, 5)

In [80]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [81]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [85]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_borough_with_toronto['Latitude'], df_borough_with_toronto['Longitude'], df_borough_with_toronto['Borough'], df_borough_with_toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [93]:
CLIENT_ID = 'MF40EEIIT0ZUQOIQCFOK15PZP1OZZLP5JRRQA5XJRFED4FNJ' # your Foursquare ID
CLIENT_SECRET = 'KDPFNZWH10J0M55HO2KDYSENHPS2MJWM0OVZ2VDSD2O1FHPL' # your Foursquare Secret
VERSION = '20190504'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: MF40EEIIT0ZUQOIQCFOK15PZP1OZZLP5JRRQA5XJRFED4FNJ
CLIENT_SECRET:KDPFNZWH10J0M55HO2KDYSENHPS2MJWM0OVZ2VDSD2O1FHPL


In [107]:
#M4X_Downtown_Toronto df_borough_with_toronto
m4x_df = df_borough_with_toronto[df_borough_with_toronto['PostalCode']=='M4X']

M4X_Downtown_Toronto_latitude = m4x_df['Latitude'].item() # neighborhood latitude value
M4X_Downtown_Toronto_longitude = m4x_df['Longitude'].item() # neighborhood longitude value

M4X_Downtown_Toronto_name = m4x_df['Borough'].item() # neighborhood name


print('Latitude and longitude values of {} are {}, {}.'.format(M4X_Downtown_Toronto_name, 
                                                               M4X_Downtown_Toronto_latitude, 
                                                               M4X_Downtown_Toronto_longitude))

Latitude and longitude values of Downtown Toronto are 43.667967, -79.3676753.


In [108]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    M4X_Downtown_Toronto_latitude, 
    M4X_Downtown_Toronto_longitude, 
    500, 
    100)

In [109]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5cfb84bc4434b9214133c0ec'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4b646a6ff964a5205cb12ae3-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/diner_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d147941735',
         'name': 'Diner',
         'pluralName': 'Diners',
         'primary': True,
         'shortName': 'Diner'}],
       'id': '4b646a6ff964a5205cb12ae3',
       'location': {'address': '601 Parliament St.',
        'cc': 'CA',
        'city': 'Toronto',
        'country': 'Canada',
        'crossStreet': 'at Wellesley St. E',
        'distance': 140,
        'formattedAddress': ['601 Parliament St. (at Wellesley St. E)',
         'Toronto ON M4X 1P9',
         'Canada'],
        'labeledLatLngs': [{

In [110]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [111]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Cranberries,Diner,43.667843,-79.369407
1,Butter Chicken Factory,Indian Restaurant,43.667072,-79.369184
2,F'Amelia,Italian Restaurant,43.667536,-79.368613
3,Kingyo Toronto,Japanese Restaurant,43.665895,-79.368415
4,Murgatroid,Restaurant,43.667381,-79.369311
