In [3]:
import requests
import os
import pandas as pd
import json

In [17]:
# Provides better readability when parsing the raw Json files.
pd.set_option('display.max_columns', None)

In [2]:
API_KEY = os.environ["YELP_FUSION_API_KEY"]

In [72]:
# Importing the DataFrame of SkyTrain stations and providing a short sample.
skytrain_stations = pd.read_csv('skytrain_stations.csv')
skytrain_sample = skytrain_stations.head(3)
skytrain_sample

Unnamed: 0,Name,Rating,Number of Ratings,Place ID,Latitude,Longitude
0,Broadway-City Hall Station,4.1,90,ChIJQx4neN1zhlQRdHFbwLLpxeM,49.262997,-123.114541
1,Vancouver City Centre,4.3,146,ChIJ04sXE39xhlQR1Woi1gmeQXY,49.282469,-123.118613
2,King Edward Station @ Platform 1,4.0,36,ChIJifoZXPJzhlQRtHmyjDhlNAc,49.249187,-123.115857


## YelpFusion API Request
In this following block, the YelpFusion search endpoint will be used to obtain certain information from 50 restaurants for each of the stations in the skytrain_stations DataFrame.

A range of 500 meters is provided in addition to the Yelp enforced limit of 50 businesses per request. Looping with the offset parameter could be used to surpass 50 businesses, however we would quickly be blocked by Yelp's daily API limit.

In [77]:
url = "https://api.yelp.com/v3/businesses/search"
headers = {
    "accept": "application/json",
    "authorization": 'bearer %s' % API_KEY 
          }

#Creating the base DataFrame that will be filled with data from the GET request.
YelpVenues = pd.DataFrame(columns = ['Venue Name', 'Average Rating', 'Review Count', 'Primary Category', 'SkyTrain Station', 'Distance to Station', 'Longitude', 'Latitude', 'ID', 'URL', 'Source']) 

#Iterating over each row within the list of SkyTrain stations to grab their names and coordinates.
for row in skytrain_stations.index:
    StationName = str(
        skytrain_stations['Name'][row]
    )
    Lat = str(
        skytrain_stations['Latitude'][row]
    )
    Long = str(
        skytrain_stations['Longitude'][row]
    )
# 50 restaurants within a radius of 500 meters are retrieved from each SkyTrain station.
    params = {
        "term": "Restaurants",
        "latitude": Lat, 
        "longitude": Long,
        "radius": 500, 
        "sort":"DISTANCE",
        "limit": 50
    }
    
    response = requests.request(
        "GET", url, params=params, headers=headers
    )
# The response from Yelp is interpreted by the json package and converted into a Pandas DataFrame.
    tempJson = json.loads(
        response.text
    )
    tempdb = pd.json_normalize(
        tempJson['businesses']
    )
# Iterates over each restaurant that was retrieved from Yelp, grabbing several different bits of information from each.
    for venue in tempdb.index:
        VenueName = tempdb['name'][venue]
        Distance = tempdb['distance'][venue]
        Rating = tempdb['rating'][venue]
        ReviewCount = tempdb['review_count'][venue]
        try:
            PrimaryCategory = tempdb['categories'][venue][0]['title']
        except IndexError:
            PrimaryCategory = tempdb['categories'][venue]
        except:
            PrimaryCategory = 'N/a'
        Latitude = tempdb['coordinates.latitude'][venue]
        Longitude = tempdb['coordinates.longitude'][venue]
        YelpID = tempdb['id'][venue]
        VenueURL = tempdb['url'][venue]
# A new Series is created to store the information for each restaurant.
        new_row = pd.Series({
            'Venue Name': VenueName, 
            'Average Rating': Rating,
            'Review Count': ReviewCount,
            'Primary Category': PrimaryCategory,
            'SkyTrain Station': StationName, 
            'Distance to Station': Distance,
            'Longitude': Longitude,
            'Latitude': Latitude,
            'ID': YelpID,
            'URL': VenueURL,
            'Source': 'Yelp'
        })
# The Series is concatenated to the previously established DataFrame. The loop continues until we have 50 restaurants for each SkyTrain station. 
        YelpVenues = pd.concat(
            [YelpVenues, new_row.to_frame().T], 
            ignore_index = True
        )
    

# DataFrame is stored as a csv for further cleaning and later analysis.
YelpVenues.to_csv('yelp_venues.csv', index=False) 

In [11]:
YelpVenues = pd.read_csv('yelp_venues.csv')
YelpVenues

Unnamed: 0,Venue Name,Average Rating,Review Count,Primary Category,SkyTrain Station,Distance to Station,Longitude,Latitude,ID,URL,Source
0,Seaport City Seafood Restaurant,4.5,19,Dim Sum,Broadway-City Hall Station,111.874634,-123.115152,49.263921,F5wxgIiZE7LYQxgqhI483A,https://www.yelp.com/biz/seaport-city-seafood-...,Yelp
1,Saku,4.5,213,Japanese,Broadway-City Hall Station,155.321784,-123.116675,49.263101,XAH2HpuUUtu7CUO26pbs4w,https://www.yelp.com/biz/saku-vancouver-8?adju...,Yelp
2,Hokkaido Ramen Santouka,4.0,222,Noodles,Broadway-City Hall Station,168.846607,-123.116892,49.263127,nkDZY5xqihF3XtZMzzfqqg,https://www.yelp.com/biz/hokkaido-ramen-santou...,Yelp
3,Lumiere Cafe,4.5,3,Cafes,Broadway-City Hall Station,108.113478,-123.115865,49.263468,sKqS0SUZRtLwAr2LA3W3eg,https://www.yelp.com/biz/lumiere-cafe-vancouve...,Yelp
4,Marulilu Cafe,4.0,285,Cafes,Broadway-City Hall Station,53.388446,-123.114150,49.263380,NensKn1MSVU_rm-1Y6WlFA,https://www.yelp.com/biz/marulilu-cafe-vancouv...,Yelp
...,...,...,...,...,...,...,...,...,...,...,...
873,The Royal City Youth Ballet Company,0.0,0,Performing Arts,Columbia,114.370138,-122.906899,49.203931,2LeHB_AvU-OsmPWLT3CWGw,https://www.yelp.com/biz/the-royal-city-youth-...,Yelp
874,Waves Coffee House,3.5,37,Coffee & Tea,Columbia,441.914045,-122.910171,49.202138,J1BK0O7Mh7-AFpvuA49wVw,https://www.yelp.com/biz/waves-coffee-house-ne...,Yelp
875,Freshslice Pizza,3.0,16,Pizza,Columbia,631.241330,-122.912705,49.201849,raAFYL7HNJbzSkwqPzdMYw,https://www.yelp.com/biz/freshslice-pizza-new-...,Yelp
876,Quesada Burritos & Tacos,3.0,19,Mexican,Columbia,636.772565,-122.912671,49.201020,UIxbtBAnmMKR-OjhG3KRIw,https://www.yelp.com/biz/quesada-burritos-and-...,Yelp
