# This notebook will be used for the capstone project

# Import libraries

In [127]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Scrape data from wiki page and write it to text file

In [128]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(source, 'lxml')
with open('Canada_codes.txt', 'w') as fo:
    for tr in soup.find_all('tr'):
            tds = tr.find_all('td')
            if not tds:
                continue
            Postalcode, Borough, Neighbourhood = [td.text.strip() for td in tds[:4]]
            print('; '.join([Postalcode, Borough, Neighbourhood]), file=fo)

ValueError: not enough values to unpack (expected 3, got 1)

# Import text file created from scraping

In [129]:
data = pd.read_csv('Canada_codes.txt', sep=";", header=None)
data.columns = ["Postalcode", "Borough", "Neighbourhood"]

# Filter out 'not assigned'

In [206]:
Canada_data = data[(data['Borough'].astype(str)).str.contains(r'^(?:(?!Not assigned).)*$')]

In [207]:
Canada_data

Unnamed: 0,Postalcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


# Join neighbourhoods


In [214]:
Canada_data.Neighbourhood = Canada_data.Neighbourhood.astype(str)
df1 = pd.DataFrame(Canada_data.groupby(['Postalcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index())
df1

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village ..."
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [215]:
Canada_geo = pd.read_csv('Geospatial_Coordinates.csv')

In [216]:
Canada_geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


# Join geo data

In [230]:
Canada_data_geo = Canada_data.join(Canada_geo.set_index('Postal Code'), on='Postalcode')

In [253]:
Canada_anlysis

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
6,M6A,North York,Lawrence Heights,43.718518,-79.464763
7,M6A,North York,Lawrence Manor,43.718518,-79.464763
14,M3B,North York,Don Mills North,43.745906,-79.352188
19,M6B,North York,Glencairn,43.709577,-79.445073
31,M3C,North York,Flemingdon Park,43.7259,-79.340923
32,M3C,North York,Don Mills South,43.7259,-79.340923
63,M2H,North York,Hillcrest Village,43.803762,-79.363452
64,M3H,North York,Bathurst Manor,43.754328,-79.442259


# Analysis

In [218]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Collecting package metadata: ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\AniaIgor\Anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.7.5                |           py37_0         3.0 MB  conda-forge
    conda-package-handling-1.3.10|           py37_0         277 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.4 MB

The following NEW packages will be INSTALLED:

  conda-package-han~ conda-forge/win-64::conda-package-handling-1.3.10-py37_0
  geographiclib      conda-forge/noarch::geographiclib-1.49-py_0
  geopy             



Folium installed
Libraries imported.


In [239]:
CLIENT_ID = '5SYIVSRH0SZI5JPHH253VQLHTYNPZR1W3Z0TI5LLB0B1XSGR' # your Foursquare ID
CLIENT_SECRET = 'V0AARCEJJQSIYRSCMHTU4RWCALUYC1Q4KZWBIQWNFDKHQ3PO' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5SYIVSRH0SZI5JPHH253VQLHTYNPZR1W3Z0TI5LLB0B1XSGR
CLIENT_SECRET:V0AARCEJJQSIYRSCMHTU4RWCALUYC1Q4KZWBIQWNFDKHQ3PO


# Select subset of data to anlyse one area

In [231]:
Canada_anlysis = Canada_data_geo[(Canada_data_geo['Borough'].astype(str)).str.contains('North York')]

In [273]:
# Select one neighbourhood's coordinates to analyse: Lawrence Heights
Canada_anlysis

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
6,M6A,North York,Lawrence Heights,43.718518,-79.464763
7,M6A,North York,Lawrence Manor,43.718518,-79.464763
14,M3B,North York,Don Mills North,43.745906,-79.352188
19,M6B,North York,Glencairn,43.709577,-79.445073
31,M3C,North York,Flemingdon Park,43.7259,-79.340923
32,M3C,North York,Don Mills South,43.7259,-79.340923
63,M2H,North York,Hillcrest Village,43.803762,-79.363452
64,M3H,North York,Bathurst Manor,43.754328,-79.442259


In [265]:
# set parameters to use to get the data from foursquere
geolocator = Nominatim(user_agent="foursquare_agent")
latitude = 43.718518
longitude = -79.464763
search_query = 'Italian'
radius = 5000

In [266]:
# quesry the data from foursquere
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=5SYIVSRH0SZI5JPHH253VQLHTYNPZR1W3Z0TI5LLB0B1XSGR&client_secret=V0AARCEJJQSIYRSCMHTU4RWCALUYC1Q4KZWBIQWNFDKHQ3PO&ll=43.718518,-79.464763&v=20180604&query=Italian&radius=5000&limit=30'

In [267]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5d1283ece0c0c9002cc2c5a1'},
 'response': {'venues': [{'id': '566344c9498eedf4e11af0fa',
    'name': "Jamie's Italian",
    'location': {'address': '3401 Dufferin St',
     'crossStreet': 'Allen Road and 401',
     'lat': 43.72668644119483,
     'lng': -79.45313253527624,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.72668644119483,
       'lng': -79.45313253527624}],
     'distance': 1304,
     'postalCode': 'M6A 2T9',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['3401 Dufferin St (Allen Road and 401)',
      'Toronto ON M6A 2T9',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d110941735',
      'name': 'Italian Restaurant',
      'pluralName': 'Italian Restaurants',
      'shortName': 'Italian',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-15614945

In [268]:
#write the results into the data frame
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId
0,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,566344c9498eedf4e11af0fa,3401 Dufferin St,CA,Toronto,Canada,Allen Road and 401,1304,"[3401 Dufferin St (Allen Road and 401), Toront...","[{'label': 'display', 'lat': 43.72668644119483...",43.726686,-79.453133,M6A 2T9,ON,Jamie's Italian,v-1561494509
1,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,4aedafd1f964a52025ce21e3,8 Kincort Street,CA,Toronto,Canada,Castlefield and Kincort,2346,"[8 Kincort Street (Castlefield and Kincort), T...","[{'label': 'display', 'lat': 43.69762110821487...",43.697621,-79.46855,,ON,Commisso Bros. & Racco Italian Bakery,v-1561494509
2,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,51830da6498eafb14b40f22c,3500 Dufferin St,CA,Toronto,Canada,,746,"[3500 Dufferin St, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.72034199204557...",43.720342,-79.455828,,ON,San Genaro Italian Eatery,v-1561494509
3,"[{'id': '4bf58dd8d48988d130941735', 'name': 'B...",False,52430d7611d2b3a076a88132,Dufferin,CA,Toronto,Canada,,1430,"[Dufferin, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.70835683159475...",43.708357,-79.453877,,ON,Italian canadian savings and credit union,v-1561494509
4,"[{'id': '4bf58dd8d48988d1c9941735', 'name': 'I...",False,4fca400ae4b0ba2d58c1a97f,,CA,,Canada,,1314,[Canada],"[{'label': 'display', 'lat': 43.72617409960949...",43.726174,-79.452317,,,La Paloma Italian Gelateria & Cafe,v-1561494509


In [269]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Jamie's Italian,Italian Restaurant,3401 Dufferin St,CA,Toronto,Canada,Allen Road and 401,1304,"[3401 Dufferin St (Allen Road and 401), Toront...","[{'label': 'display', 'lat': 43.72668644119483...",43.726686,-79.453133,M6A 2T9,ON,566344c9498eedf4e11af0fa
1,Commisso Bros. & Racco Italian Bakery,Italian Restaurant,8 Kincort Street,CA,Toronto,Canada,Castlefield and Kincort,2346,"[8 Kincort Street (Castlefield and Kincort), T...","[{'label': 'display', 'lat': 43.69762110821487...",43.697621,-79.46855,,ON,4aedafd1f964a52025ce21e3
2,San Genaro Italian Eatery,Italian Restaurant,3500 Dufferin St,CA,Toronto,Canada,,746,"[3500 Dufferin St, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.72034199204557...",43.720342,-79.455828,,ON,51830da6498eafb14b40f22c
3,Italian canadian savings and credit union,Building,Dufferin,CA,Toronto,Canada,,1430,"[Dufferin, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.70835683159475...",43.708357,-79.453877,,ON,52430d7611d2b3a076a88132
4,La Paloma Italian Gelateria & Cafe,Ice Cream Shop,,CA,,Canada,,1314,[Canada],"[{'label': 'display', 'lat': 43.72617409960949...",43.726174,-79.452317,,,4fca400ae4b0ba2d58c1a97f
5,Cumpari's Italian Eatery,Italian Restaurant,3610 dufferin street,CA,Toronto,Canada,Dufferin/Wilson,1563,"[3610 dufferin street (Dufferin/Wilson), Toron...","[{'label': 'display', 'lat': 43.73211827821453...",43.732118,-79.459921,M3K 1N7,ON,52b5da0e498e96708cf1c974
6,The Italian Sandwich,Trailer Park,242 Rushton Road,CA,Toronto,Canada,,5197,"[242 Rushton Road, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.680377, 'lng':...",43.680377,-79.427523,,ON,546e313c498ea838219b4ce7
7,Italian Cafe,Fast Food Restaurant,2200 Yonge St,CA,Toronto,Canada,,5437,"[2200 Yonge St, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.72377014903966...",43.72377,-79.397573,,ON,4b9fbb53f964a5206a3937e3
8,Saggio Italian Eatery & Espresdo Bar,Italian Restaurant,,CA,Toronto,Canada,,4536,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.75834033292955...",43.75834,-79.476741,,ON,4de3e16efa7651589f21395e
9,Catania Italian Cuisine,Italian Restaurant,,CA,Toronto,Canada,,5020,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.680797, 'lng':...",43.680797,-79.43057,,ON,4cfc22012d80a14304854fd8


In [270]:
dataframe_filtered.name

0                               Jamie's Italian
1         Commisso Bros. & Racco Italian Bakery
2                     San Genaro Italian Eatery
3     Italian canadian savings and credit union
4            La Paloma Italian Gelateria & Cafe
5                      Cumpari's Italian Eatery
6                          The Italian Sandwich
7                                  Italian Cafe
8          Saggio Italian Eatery & Espresdo Bar
9                       Catania Italian Cuisine
10                      Agio Italian Ristorante
11                                 Il vagabondo
12                               Dora's Italian
13                  Jolly II Italian Restaurant
Name: name, dtype: object

In [272]:
# create map of the venues withn 5000 radius of chosen latitude and longitude
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Chosen neighbourhood',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)
# display map
venues_map