# Toronto neighborhood webscrape

## Question 1

In [1]:
# Third party imports
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests

In [15]:
# retrieve the appropriate web page
response = requests.get(
	url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M",
)
print(response.status_code)

200


In [16]:
# Use beautiful Soup to read the html
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find(id='firstHeading')
print(title.string)

List of postal codes of Canada: M


In [17]:
# Use beautiful soup to extract the table element and pass that into a list of values, ready to be turned into a dataframe
table_contents = []
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

In [18]:
# Pass values into a dataframe 
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace(
    {
        'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
        'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
        'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
        'MississaugaCanada Post Gateway Processing Centre':'Mississauga'
    }
)

In [19]:
df.shape

(103, 3)

In [20]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
    len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 15 boroughs and 103 neighborhoods.


## Question 2

In [21]:
import geocoder

Using the google geocode option was returning an error with <REQUEST DENIED>. I therefore switched to arcgis. 

In [27]:
LatLng_df = []
for i, row in df.iterrows():
    code = row.PostalCode
    LL = geocoder.arcgis(f'{row.PostalCode}, Toronto, Ontario').latlng
    LatLng_df.append({'PostalCode':code, 'Latitude':round(LL[0], 5), 'Longitude':round(LL[1], 5)})

LatLng_df = pd.DataFrame(LatLng_df)

In [31]:
toronto_df = pd.merge(df, LatLng_df, on="PostalCode")
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188


# Section 3

### Import libraries and load credentails 

In [139]:
import folium
from sklearn.cluster import KMeans
from dotenv import dotenv_values

config = dotenv_values('.env')

four_id = config['CLIENT_ID']
four_sc = config['CLIENT_SECRET']

In [140]:
map_Toronto = folium.Map(location=[43.7432, -79.3832], zoom_start=11)

for i, row in toronto_df.iterrows():
    label = folium.Popup(f'{row.Neighborhood} - {row.Borough}')
    folium.CircleMarker(
        [row.Latitude, row.Longitude],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto) 
map_Toronto


### What are the boroughs of Toronto and how many neighbourhoods are in each one?

In [141]:
toronto_df['Borough'].value_counts()

North York                24
Downtown Toronto          17
Scarborough               17
Etobicoke                 11
Central Toronto            9
West Toronto               6
York                       5
East Toronto               4
East York                  4
East York/East Toronto     1
Queen's Park               1
East Toronto Business      1
Etobicoke Northwest        1
Mississauga                1
Downtown Toronto Stn A     1
Name: Borough, dtype: int64

### North Youk contains the most entries, so lets focus in on that

In [142]:
york_df = toronto_df.loc[toronto_df.Borough == 'North York']
print('The york_df contains information for', york_df.shape[0], 'neighborhoods.')
york_df.head()

The york_df contains information for 24 neighborhoods.


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
7,M3B,North York,Don Mills North,43.74923,-79.36186
10,M6B,North York,Glencairn,43.70687,-79.44812


### Find the centre of the borough.

In [143]:
yorkLatMax = york_df.Latitude.max()
yorkLatMin = york_df.Latitude.min()
yorkLngMax = york_df.Longitude.max() 
yorkLngMin = york_df.Longitude.min()

yorkLat = (yorkLatMax+yorkLatMin)/2
yorkLng = (yorkLngMax+yorkLngMin)/2


In [163]:

map_NorthYork = folium.Map(location=[yorkLat, yorkLng], zoom_start=13)

for i, row in york_df.iterrows():
    label = folium.Popup(f'{row.Neighborhood} - {row.Borough}')
    folium.CircleMarker(
        [row.Latitude, row.Longitude],
        radius=10,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_NorthYork) 
map_NorthYork

In [145]:
york_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
7,M3B,North York,Don Mills North,43.74923,-79.36186
10,M6B,North York,Glencairn,43.70687,-79.44812
13,M3C,North York,Don Mills South,43.72168,-79.34352
27,M2H,North York,Hillcrest Village,43.80225,-79.35558
28,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.75788,-79.44847
33,M2J,North York,"Fairview, Henry Farm, Oriole",43.78097,-79.34781
34,M3J,North York,"Northwood Park, York University",43.76476,-79.48798


In [146]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    four_id, 
    four_sc, 
    20120609,
    # Centre search on North Your district 
    yorkLat, 
    yorkLng,
    # Search radious 500m
    500, 
    # Top 100 results
    100)

results = requests.get(url).json()


In [176]:
# explore JSON to return details for individual places
# results['response']['groups'][0]['items'][0]

In [190]:
york_paces_df = pd.DataFrame(pd.json_normalize(results['response']['groups'][0]['items']))

In [191]:
# explore the dataframe 
# list(york_paces_df.columns)

In [192]:
# extract columns of interest from initial dataframe
york_paces_df = york_paces_df[['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']]

In [193]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [194]:
york_paces_df['venue.categories'] = york_paces_df.apply(get_category_type, axis=1)

# rename columns
york_paces_df = york_paces_df.rename(columns={
    'venue.name': 'Name', 'venue.categories': 'Category', 
    'venue.location.lat':'Latitude', 'venue.location.lng': 'Longitude'})

In [196]:
york_paces_df['Category'].value_counts()

Bank                 2
Restaurant           1
Pharmacy             1
Dog Run              1
Sushi Restaurant     1
Coffee Shop          1
Mobile Phone Shop    1
Ski Chalet           1
Diner                1
Shopping Mall        1
Pet Store            1
Community Center     1
Ski Area             1
Gas Station          1
Bridal Shop          1
Pizza Place          1
Ice Cream Shop       1
Park                 1
Sandwich Place       1
Name: Category, dtype: int64