In [2]:
import requests
from IPython.display import JSON
import pandas as pd
import os
import json
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 100)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [77]:
#Foursquare V3 endpoint
ENDPOINTfs = 'https://api.foursquare.com/v3/places/search'
HEADERSfs = {'Authorization': os.environ['Foursquare_API'], 'Accept':'application/json'}
PARAMSfs={'ll':'49.262487,-123.114397', 'radius':1000, 'limit':50, 'categories':'13000'}
#response = requests.get(url = ENDPOINTfs, params = PARAMSfs,headers=HEADERSfs)

df = pd.read_csv('../data/city_bikes.csv', index_col='id')

rawResultsFS = []
for lat, long in zip(df['latitude'], df['longitude']):
    PARAMSfs['ll'] = f'{lat},{long}'
    response = requests.get(url = ENDPOINTfs, params = PARAMSfs,headers=HEADERSfs)
    rawResultsFS.append(response.json())

#save raw results
with open("../data/rawFSdataDineDrink.json", "w") as outfile:
    json.dump(rawResultsFS, outfile)

In [149]:
#Foursquare V2 endpoint
ENDPOINTfs = 'https://api.foursquare.com/v2/venues/explore'
HEADERSfs = {'Accept':'application/json'}
PARAMSfs={'ll':'49.270721,-123.146175', 'radius':1000, 'limit':100, 'v':20230730, 'offset':100,
         'client_id':os.environ['Foursquare_IDv2'], 'client_secret':os.environ['Foursquare_SECRETv2']}
#response = requests.get(url = ENDPOINTfs, params = PARAMSfs,headers=HEADERSfs)


#do some random point checks on the resultant data frames
# obj = response.json()
# testRes = [x['venue']['categories'][0]['name'] for x in obj['response']['groups'][0]['items'] if 'Japanese Restaurant' in x['venue']['categories'][0]['name']]

df = pd.read_csv('../data/city_bikes.csv')

rawResultsFS = []
for lat, long in zip(df['latitude'], df['longitude']):
    PARAMSfs['ll'] = f'{lat},{long}'
    response = requests.get(url = ENDPOINTfs, params = PARAMSfs,headers=HEADERSfs)
    rawResultsFS.append(response.json())

#save raw results
with open("../data/rawFSdataV2All-100-200.json", "w") as outfile:
    json.dump(rawResultsFS, outfile)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [78]:
#Foursquare V3 endpoint search
with open('../data/rawFSdataDineDrink.json', 'r') as openfile:
    rawResults = json.load(openfile)

df_cb = pd.read_csv('../data/city_bikes.csv')

fsPOI = {'bikeStationID' : [], 'name' : [], 'latitude' : [], 'longitude' : [], 'category' : []}
idIdx = 0 #index to traverse through city bikes data frame

for rawPOI in rawResults:
    for poi in rawPOI['results']: #data in inner loop was generated with coordinates in each row of city bikes data frame
        try:
            fsPOI['bikeStationID'].append(df_cb.loc[idIdx, 'id'])
            fsPOI['name'].append(poi['name'])
            fsPOI['latitude'].append(poi['geocodes']['main']['latitude'])
            fsPOI['longitude'].append(poi['geocodes']['main']['longitude'])
            cats = [cat['name'] for cat in poi['categories']]
            fsPOI['category'].append(','.join(cats)) #get all the categories
        except Exception as e:
            print(f"error occurred on index {idIdx}. Info: {type(e).__name__}")
    idIdx += 1 #increment index in outer loop


In [3]:
#Foursquare V2 endpoint broad search. API decides which are the most common/popular POIs
with open('../data/rawFSdataV2All-1-00.json', 'r') as openfile:
    rawResults = json.load(openfile)

with open('../data/rawFSdataV2All-100-200.json', 'r') as openfile:
    rawResults.extend(json.load(openfile))
    
#to get the city bike station ids
df_cb = pd.read_csv('../data/city_bikes.csv')
cb_totRows = df_cb['id'].count()

#columns for the data frame
fsPOI = {'bikeStationID' : [], 'name' : [], 'latitude' : [], 'longitude' : [], 'category' : []}
idIdx = 0 #index to traverse through city bikes data frame
for rawPOI in rawResults:
    try: #groups key sometimes does not exist  
        for poi in rawPOI['response']['groups'][0]['items']: #data in inner loop was generated with coordinates in each row of city bikes data frame
            fsPOI['bikeStationID'].append(df_cb.loc[idIdx, 'id'])
            fsPOI['name'].append(poi['venue']['name'])
            fsPOI['latitude'].append(poi['venue']['location']['lat'])
            fsPOI['longitude'].append(poi['venue']['location']['lng'])
            cats = [cat['name'] for cat in poi['venue']['categories']]
            fsPOI['category'].append(','.join(cats)) #get all the categories  
    except Exception as e:
        print(f"error occurred on index {idIdx}. Info: {type(e).__name__}")
    if idIdx == cb_totRows - 1: #reset counter if there are more than one datasets combined
        idIdx = 0
    else:
        idIdx += 1 #increment index in outer loop
#Note: error will occur on the last coordinates of city bikes because it has (0,0)

error occurred on index 244. Info: KeyError
error occurred on index 244. Info: KeyError


Put your parsed results into a DataFrame

In [85]:
df_fs = pd.DataFrame(fsPOI)


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [72]:
ENDPOINTyelp = 'https://api.yelp.com/v3/businesses/search'
HEADERSyelp = {
    'Accept': 'application/json',
    'Authorization': 'Bearer %s' % os.environ['Yelp_API']}
PARAMSyelp = {'latitude':49.262487, 'longitude':-123.114397, 'radius': 1000, 'limit':50, 'categories':'coffee, All' }
#response = requests.get(url = ENDPOINTyelp, params = PARAMSyelp, headers = HEADERSyelp)

df = pd.read_csv('../data/city_bikes.csv')
#note that we do this in two batches. First with offset 0, then with offset 50
rawResultsYelp = []
for lat, long in zip(df['latitude'], df['longitude']):
    PARAMSyelp['latitude'] = lat
    PARAMSyelp['longitude'] = long
    response = requests.get(url = ENDPOINTyelp, params = PARAMSyelp, headers = HEADERSyelp)
    rawResultsYelp.append(response.json())

#save raw results (commented out to avoid accidentally over-writing file)
# with open("../data/rawYelpDataCoffee.json", "w") as outfile:
#     json.dump(rawResultsYelp, outfile)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [83]:
#Broad search. API decides which are the most common/popular POIs
with open('../data/rawYelpData1-50.json', 'r') as openfile:
    rawYelpData = json.load(openfile)

with open('../data/rawYelpData50-100.json', 'r') as openfile:
    rawYelpData.extend(json.load(openfile))

#to get the city bike station ids
df_cb = pd.read_csv('../data/city_bikes.csv')
cb_totRows = df_cb['id'].count()


idIdx = 0 #index to traverse through city bikes data frame
#columns for the data frame
yelpPOI = {'bikeStationID' : [], 'name' : [], 'latitude' : [], 'longitude' : [], 'category' : [], 'rating' : [], 'numReviews' : []}
for rawPOI in rawYelpData:
    for poi in rawPOI['businesses']: #data in inner loop was generated with coordinates in each row of city bikes data frame
        try:
            yelpPOI['bikeStationID'].append(df_cb.loc[idIdx, 'id'])
            yelpPOI['name'].append(poi['name'])
            yelpPOI['latitude'].append(poi['coordinates']['latitude'])
            yelpPOI['longitude'].append(poi['coordinates']['longitude'])
            cats = [cat['title'] for cat in poi['categories']]
            yelpPOI['category'].append(','.join(cats)) #get all the categories  
            #yelpPOI['category'].append(poi['categories'][0]['title'])  
            yelpPOI['rating'].append(poi['rating'])  
            yelpPOI['numReviews'].append(poi['review_count'])
        except Exception as e:
            print(f"error occurred on index {idIdx}. Info: {type(e).__name__}")

    if idIdx == cb_totRows - 1: #reset counter if there are more than one datasets combined
        idIdx = 0
    else:
        idIdx += 1 #increment index in outer loop

In [68]:
#'Restaurants' specifically only
with open('../data/rawYelpDataRestaurants.json', 'r') as openfile:
    rawYelpData = json.load(openfile)
#to get the city bike station ids
df_cb = pd.read_csv('../data/city_bikes.csv')
cb_totRows = df_cb['id'].count()

idIdx = 0 #index to traverse through city bikes data frame
#columns for the data frame
yelp_restaurants = {'bikeStationID' : [], 'name' : [], 'latitude' : [], 'longitude' : [], 'category' : [], 'rating' : [], 'numReviews' : []}
for rawPOI in rawYelpData:
    for poi in rawPOI['businesses']: #data in inner loop was generated with coordinates in each row of city bikes data frame
        try:
            yelp_restaurants['bikeStationID'].append(df_cb.loc[idIdx, 'id'])
            yelp_restaurants['name'].append(poi['name'])
            yelp_restaurants['latitude'].append(poi['coordinates']['latitude'])
            yelp_restaurants['longitude'].append(poi['coordinates']['longitude'])
            cats = [cat['title'] for cat in poi['categories']]
            yelp_restaurants['category'].append(','.join(cats)) #get all the categories  
            #yelp_restaurants['category'].append(poi['categories'][0]['title'])  
            yelp_restaurants['rating'].append(poi['rating'])  
            yelp_restaurants['numReviews'].append(poi['review_count'])
        except Exception as e:
            print(f"error occurred on index {idIdx}. Info: {type(e).__name__}")

    if idIdx == cb_totRows - 1: #reset counter if there are more than one datasets combined
        idIdx = 0
    else:
        idIdx += 1 #increment index in outer loop


In [74]:
#'Coffee' specifically only
with open('../data/rawYelpDataCoffee.json', 'r') as openfile:
    rawYelpData = json.load(openfile)
#to get the city bike station ids
df_cb = pd.read_csv('../data/city_bikes.csv')
cb_totRows = df_cb['id'].count()

idIdx = 0 #index to traverse through city bikes data frame
#columns for the data frame
yelp_coffee = {'bikeStationID' : [], 'name' : [], 'latitude' : [], 'longitude' : [], 'category' : [], 'rating' : [], 'numReviews' : []}
for rawPOI in rawYelpData:
    for poi in rawPOI['businesses']: #data in inner loop was generated with coordinates in each row of city bikes data frame
        try:
            yelp_coffee['bikeStationID'].append(df_cb.loc[idIdx, 'id'])
            yelp_coffee['name'].append(poi['name'])
            yelp_coffee['latitude'].append(poi['coordinates']['latitude'])
            yelp_coffee['longitude'].append(poi['coordinates']['longitude'])
            cats = [cat['title'] for cat in poi['categories']]
            yelp_coffee['category'].append(','.join(cats)) #get all the categories  
            #yelp_coffee['category'].append(poi['categories'][0]['title'])  
            yelp_coffee['rating'].append(poi['rating'])  
            yelp_coffee['numReviews'].append(poi['review_count'])
        except Exception as e:
            print(f"error occurred on index {idIdx}. Info: {type(e).__name__}")

    if idIdx == cb_totRows - 1: #reset counter if there are more than one datasets combined
        idIdx = 0
    else:
        idIdx += 1 #increment index in outer loop

Put your parsed results into a DataFrame

In [84]:
df_yelp_restaurants = pd.DataFrame(yelpPOI)
df_yelp_restaurants.to_csv('../data/yelp_POIall.csv', index=False)
df_yelp_restaurants = pd.DataFrame(yelp_restaurants)
df_yelp_restaurants.to_csv('../data/yelp_restaurants.csv', index=False)
df_yelp_coffee = pd.DataFrame(yelp_coffee)
df_yelp_coffee.to_csv('../data/yelp_coffee.csv', index=False)


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

##### The Yelp API provided more complete data. It gives ratings and the number of ratings of points of interest (POI) which were not available in the Foursquare API. The Foursquare V3 API also has a limitation of 50 POIs per API call so the amountof data is limited. The Yelp API is also limited by 50 POI per API call but it also offers an offset parameter that allows retrieval of more results. However, the Foursquare V2 API allows for retrieval of up to 100 POIs per API call along with the offset parameter to retrieve more results. I noticed sometimes there are multiple descriptions of categories returned by the APIs so I concatenated all of them to increase the probability of matches when the data is exported to a data frame. 

##### Since the Foursquare V2 and Yelp APIs can provide more data, I also made API calls to them without any filter for the returned data and let the API decide on the matches.

Get the top 10 restaurants according to their rating

In [76]:
df_yelp_restaurants.groupby(['name', 'rating', 'numReviews']).head(1).sort_values(['rating', 'numReviews'], ascending=[False,False]).nlargest(10,['rating'])


Unnamed: 0,bikeStationID,name,latitude,longitude,category,rating,numReviews
74,4,Manoush'eh,49.276671,-123.125701,Mediterranean,5.0,230
467,15,Incognito Coffee,49.28037,-123.11969,"Coffee & Tea,Sandwiches,Breakfast & Brunch",5.0,173
96,4,Number E Food,49.277569,-123.131018,"Sandwiches,Coffee & Tea",5.0,138
1825,69,Mazahr Lebanese Kitchen,49.261551,-123.138305,Lebanese,5.0,46
5359,221,The Garden Strathcona,49.28076,-123.08563,"Cafes,Bakeries,Accessories",5.0,36
911,32,Arike Restaurant,49.286805,-123.140856,"African,Cocktail Bars,Canadian (New)",5.0,25
835,27,Ignite Pizzeria,49.27724,-123.11858,Pizza,5.0,20
1799,66,True Nosh,49.265195,-123.137091,"Health Markets,Cafes,Gluten-Free",5.0,19
12011,3000,Pasta Panino,37.761014,-122.435911,"Italian,Pasta Shops",5.0,18
135,5,The Magnet,49.282402,-123.111079,"Beer Bar,Tapas/Small Plates,Cocktail Bars",5.0,17
