# Capstone Project 

*Where to move in Toronto, Ontario*

## Table of contents
* [Getting the data](#section1)
* [Get close venues for each neighborhood](#section2)

<a id='section1'></a>
## Getting the data

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup # this module helps in web scrapping.
import folium
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [2]:
URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data  = requests.get(URL).text
soup = BeautifulSoup(data,"html5lib")

#find all html tables in the web page
tables = soup.find_all('table') # in html table is represented by the tag <table>
print(f"There are {len(tables)} tables on this website.")

# target table is table 0
targetTable = tables[0]
#print(tables[0].prettify())

There are 3 tables on this website.


In [3]:
# Creating Dataframe from scraped data
data = pd.DataFrame(columns=["PostalCode", "Borough", "Neighborhood"])

for row in targetTable.tbody.find_all("tr"):
    fields = row.find_all("td")
    
    for field in fields:
        postalCode = field.find("b").text
        if field.find("span").text == "Not assigned":
            continue
        borough = field.find("span").text.split("(")[0]
        neighborhood = field.find("span").text.split("(")[1]
        neighborhood = neighborhood.split(")")[0]
        
        # if neighborhoods are separeted by "/", replace by delimiter ","
        if "/" in neighborhood:
            neighborhood = neighborhood.replace("/ ", ", ")
        
        data = data.append({"PostalCode":postalCode,
                            "Borough":borough,
                            "Neighborhood":neighborhood}, ignore_index=True)

In [4]:
data

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


In [5]:
geodata = pd.read_csv("Geospatial_Coordinates.csv")
geodata.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [6]:
lat = []
long = []

for postalCode in data['PostalCode']:
    lat.append(geodata[geodata['Postal Code'] == postalCode]["Latitude"].values[0])
    long.append(geodata[geodata['Postal Code'] == postalCode]["Longitude"].values[0])

data['Latitude'] = lat
data['Longitude'] = long

In [7]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [64]:
class style:
   BOLD = '\033[1m'
   END = '\033[0m'

In [77]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Neighborhood']):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=f"<i>{neighborhood}, <b>{borough}</b></i>",
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

<a id='section2'></a>
# Get close venues for each neighborhood

In this project i will only take a look at the general categories (e.g. restaurants) not subcategories like Asian-Restaurant.

### Get list of all possible categories

In [63]:
content = soup.find(class_="documentTemplate__Content-sc-5mpekp-0 bbBBoE")

categories = []

for item in content.ul.children:
    categories.append(item.div.h3.text)
    
categories.sort()
categories

['Arts & Entertainment',
 'College & University',
 'Event',
 'Food',
 'Nightlife Spot',
 'Outdoors & Recreation',
 'Professional & Other Places',
 'Residence',
 'Shop & Service',
 'Travel & Transport']

### Create dictionary to lookup category IDs


In [101]:
# Create a dictionary class
class my_dictionary(dict):

    # __init__ function
    def __init__(self):
        self = dict()
          
    # Function to add key:value
    def add(self, key, value):
        self[key] = value

In [102]:
categoriesDict = my_dictionary()

for item in content.ul.children:
    category = item.div.h3.text
    categoryID = item.div.p.text
    categoriesDict.add(category, categoryID)

categoriesDict

{'Arts & Entertainment': '4d4b7104d754a06370d81259',
 'College & University': '4d4b7105d754a06372d81259',
 'Event': '4d4b7105d754a06373d81259',
 'Food': '4d4b7105d754a06374d81259',
 'Nightlife Spot': '4d4b7105d754a06376d81259',
 'Outdoors & Recreation': '4d4b7105d754a06377d81259',
 'Professional & Other Places': '4d4b7105d754a06375d81259',
 'Residence': '4e67e38e036454776db1fb3a',
 'Shop & Service': '4d4b7105d754a06378d81259',
 'Travel & Transport': '4d4b7105d754a06379d81259'}

### Add categories to data

In [103]:
for category in categories:
    data[category] = None

data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Arts & Entertainment,College & University,Event,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Residence,Shop & Service,Travel & Transport
0,M3A,North York,Parkwoods,43.753259,-79.329656,,,,,,,,,,
1,M4A,North York,Victoria Village,43.725882,-79.315572,,,,,,,,,,
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,,,,,,,,,,
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763,,,,,,,,,,
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494,,,,,,,,,,


### Get venues of each category for each neighborhood

In [9]:
with open("./Credentials/client_ID.txt") as file:
    CLIENT_ID = file.read() # your Foursquare ID
with open("./Credentials/client_secret.txt") as file:
    CLIENT_SECRET = file.read() # your Foursquare Secret 
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

#print('Your credentails:')
#print('CLIENT_ID: ' + CLIENT_ID)
#print('CLIENT_SECRET:' + CLIENT_SECRET)

In [174]:
def getNearbyVenues(categories, names, latitudes, longitudes, radius=500):
    
    nearby_venues = pd.DataFrame(columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude'] + categories)
    venues_dict=my_dictionary()
    for name, lat, lng in zip(names, latitudes, longitudes):
        if name == names[1]:
            break
        
        tmpDict = my_dictionary()
        tmpDict.add("Neighborhood",name)
        tmpDict.add("Latitude", lat)
        tmpDict.add("Longitude", lng)
        
        for category in categories:
            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?categoryID={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                categoriesDict[category],
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT)

            # make the GET request to get amount of items of each category
            results = requests.get(url).json()["response"]["venues"]

            #count quantity of items of each category
            quantity = len(results)

            #list names of items
            itemNames = [results[i]["name"] for i in range(0, len(results))]            
            
            #add categories to dictionary
            tmpDict.add(category, quantity)

        nearby_venues.append(tmpDict, ignore_index=True)
    #nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    
    
    return nearby_venues

In [175]:
columns = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude'] + categories
pd.DataFrame(columns = columns)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Arts & Entertainment,College & University,Event,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Residence,Shop & Service,Travel & Transport


In [176]:
data1 = getNearbyVenues(categories,
                        names=data['Neighborhood'],
                        latitudes=data['Latitude'],
                        longitudes=data['Longitude'])
data1

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Arts & Entertainment,College & University,Event,Food,Nightlife Spot,Outdoors & Recreation,Professional & Other Places,Residence,Shop & Service,Travel & Transport


In [157]:
data1[0]

<generator object getNearbyVenues.<locals>.<genexpr> at 0x000001A85164E948>

In [121]:
results = requests.get(url).json()['response']['venues']
len(results)
results[0]

{'id': '605e70dfc1f0442ea07df4bb',
 'name': 'Debe’s Roti & Doubles',
 'location': {'address': '2881 Jane Street',
  'lat': 43.751907,
  'lng': -79.516008,
  'labeledLatLngs': [{'label': 'display',
    'lat': 43.751907,
    'lng': -79.516008}],
  'distance': 428,
  'postalCode': 'M3N 2J5',
  'cc': 'CA',
  'city': 'Toronto',
  'state': 'ON',
  'country': 'Canada',
  'formattedAddress': ['2881 Jane Street', 'Toronto ON M3N 2J5', 'Canada']},
 'categories': [{'id': '4bf58dd8d48988d144941735',
   'name': 'Caribbean Restaurant',
   'pluralName': 'Caribbean Restaurants',
   'shortName': 'Caribbean',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/caribbean_',
    'suffix': '.png'},
   'primary': True}],
 'referralId': 'v-1617971364',
 'hasPerk': False}

In [94]:
venues = getNearbyVenues(names=data['Neighborhood'],
                         latitudes=data['Latitude'],
                         longitudes=data['Longitude'])

In [12]:
venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,TTC stop #8380,43.752672,-79.326351,Bus Stop
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
