In [323]:
# imports
import pandas as pd
import requests
import os
from pprint import pprint
import json
import numpy as np


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [8]:
#import list of lat long from city bikes notebook

%store -r cb_lat_long
api_key = os.environ["foursquare_api"]
radius = "1000" 

#ll = latitude and longitude   

headers = {
   "Accept": "application/json",
   'Authorization': api_key
   }

fields = "name,rating,popularity,price,distance,stats,location"
category = {"Dining and Drinking": "13000"}

fsq_responses = []
for latlong in cb_lat_long:
   ll = ",".join(map(str,latlong))
   url = "https://api.foursquare.com/v3/places/search?" + "&ll=" + ll + "&radius=" + radius + "&categories=" + category["Dining and Drinking"] + "&fields=" + fields
   result = requests.get(url, headers=headers)
   fsq_responses.append(result.json())  



In [10]:
#write to json file for later use
with open('fsq_responses.json', 'w') as f:
    json.dump(fsq_responses, f)
#a instead of w maybe

In [253]:
with open('fsq_responses.json') as fsq_json:
    fsq_data = json.load(fsq_json)
fsq_json.close()

In [None]:
pprint(fsq_data)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
fsq_list = []
for item in fsq_data:
    context = item['context']
    latitude = context['geo_bounds']['circle']['center']['latitude']
    longitude = context['geo_bounds']['circle']['center']['longitude']
    results = item['results']
    for result in results:
        fsq_dict = {} # create a new dictionary for each result
        fsq_dict['latitude'] = latitude
        fsq_dict['longitude'] = longitude
        for key, value in result.items():
            fsq_dict[key] = value
        fsq_list.append(fsq_dict) # append the dictionary to the list
print(len(fsq_list))
pprint(fsq_list)


Put your parsed results into a DataFrame

In [None]:

foursquare_df = pd.DataFrame(fsq_list)

#isolated address
address_df = pd.DataFrame(foursquare_df['location'])
address_df = address_df.join(address_df['location'].apply(pd.Series))
address_df = address_df.drop(columns=['location', 'address', 'cross_street', 'locality', 'postcode', 'region', 'address_extended', 'country'])

#isolated stats - total reviews etc
stats_df = pd.DataFrame(foursquare_df['stats'])
stats_df = stats_df.join(stats_df['stats'].apply(pd.Series))
stats_df = stats_df.drop(columns=['stats', 0])
stats_df.head()

#joined the dfs foursquare, address, stats

foursquare_join_df = foursquare_df.join(address_df).join(stats_df)
foursquare_join_df.head()

#dropped columns considered irrelevent to project
foursquare_df_final = foursquare_join_df.drop(columns=['location', 'stats'])

#rename column
foursquare_df_final = foursquare_df_final.rename(columns={'formatted_address': 'address'})

foursquare_df_final

In [311]:
#store foursquare df
%store foursquare_df_final

Stored 'foursquare_df_final' (DataFrame)


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
%store -r cb_lat_long
yelp_key = os.environ["yelp_api"]
yelp_radius = '1000'
yelp_headers = {
    "Accept": "application/json",
   'Authorization': 'Bearer ' + yelp_key
}

category_term = "dining and drinking"

yelp_responses = []
for latlong in cb_lat_long:
    latitude = str(latlong[0])
    longitude = str(latlong[1])
    url = 'https://api.yelp.com/v3/businesses/search?latitude=' + latitude + '&longitude=' + longitude + '&term=' + category_term + '&radius=' + yelp_radius + '&categories=' + category_term + '&sort_by=best_match'
    yelp_response = requests.get(url, headers=yelp_headers)
    if yelp_response.status_code == 200:
        yelp_responses.append(yelp_response.json())
    elif yelp_response.status_code == 429:
        print("Rate Limited")
    elif yelp_response.status_code == 400:
        print("Invalid Request")
    else:
        print("Other error. Investigate")

In [None]:
print(len(yelp_responses))
pprint(yelp_responses)

In [46]:
#write to json file for later use
with open('yelp_responses.json', 'w') as f:
    json.dump(yelp_responses, f)

In [126]:
with open('yelp_responses.json') as yelp_json:
    yelp_data = json.load(yelp_json)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
print(len(yelp_data))
pprint(yelp_data[1])


In [None]:
yelp_list = []
for item in yelp_data:
    latitude = item['region']['center']['latitude']
    longitude = item['region']['center']['longitude']
    for business in item['businesses']:
        business_dict = {} # create a new dictionary for each business
        business_dict['latitude'] = latitude
        business_dict['longitude'] = longitude
        business_dict['address'] = business['location']['display_address']
        for key, value in business.items():
            business_dict[key] = value
        yelp_list.append(business_dict) # append the dictionary to the list
print(len(yelp_list))
pprint(yelp_list)


Put your parsed results into a DataFrame

In [None]:
yelp_raw_df = pd.DataFrame(yelp_list)

#columns to drop = address, id, alias, image_url, is_closed, url, categproes, transactions, location, phone, display_phone, address3, city, zip_code, country, state, display_address
#columns were considered irrelevent for the project
yelp_df_final = yelp_raw_df.drop(columns=['location', 'id', 'alias', 'image_url', 'is_closed', 'url', 'categories', 'coordinates', 'transactions', 'phone', 'display_phone',])

#change column name to match foursquare
yelp_df_final = yelp_df_final.rename(columns={'review_count': 'total_ratings'})

#standardized rating to math foursquare; scale 1-5 to 1-10
yelp_df_final['rating'] = yelp_df_final['rating'].apply(lambda x: x * 2)

#standardize price
price_dict = {
    '$': 1,
    '$$': 2,
    '$$$': 3,
    '$$$$': 4
}
yelp_df_final['price'] = yelp_df_final['price'].replace(price_dict)

#fix address column
yelp_address = pd.DataFrame(yelp_df_final['address'].to_list(), columns= ['address', 'city', 'locality', 'country', 'extra'])
yelp_df_final['address_proper'] = yelp_address['address'] + ', ' + yelp_address['city'] + ', ' + yelp_address['locality']
yelp_df_final = yelp_df_final.drop(columns=['address'])
yelp_df_final = yelp_df_final.rename(columns={'address_proper': 'address'})
yelp_df_final


In [310]:
#store df 
%store yelp_df_final

Stored 'yelp_df_final' (DataFrame)


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

I found Foursquare provided more complete information. For one, Foursquare provided more categories that results could be returned for. For example, Yelp did not provide popularity and stats - total photos.

I did notice yelp provided more results than foursquare by over a 1000 results. Additionally, the top 10 ratings from each had no common restaurants.

Get the top 10 restaurants according to their rating

Foursquare and Yelp returned different top 10 ratings. None of the top 10 in Foursquare can be found in the top 10 for Yelp. I am curious as to why, but I will save it till the end of the project if I have time.

In [343]:
#sort values
fsq_unique_establishments = foursquare_df_final.drop_duplicates('name')
fsq_unique_establishments.sort_values('rating', ascending=False).head(10)


Unnamed: 0,latitude,longitude,distance,name,popularity,price,rating,address,total_photos,total_ratings,total_tips
22,49.279764,-123.110154,385,Revolver,0.973746,2.0,9.4,"325 Cambie St (btwn Hastings & Cordova), Vanco...",966.0,802.0,184.0
7,49.262487,-123.114397,674,33 Acres Brewing Co,0.94919,2.0,9.4,"15 8th Ave W (btwn Manitoba & Ontario), Vancou...",596.0,378.0,75.0
186,49.288444,-123.139203,559,The Inukshuk,0.995596,,9.3,"1700 Beach Ave, Vancouver BC",608.0,78.0,6.0
71,49.271721,-123.104207,327,Earnest Ice Cream,0.983108,1.0,9.3,"1829 Quebec St (at E 2nd Ave), Vancouver BC V5...",264.0,298.0,51.0
792,49.284157,-123.099917,379,The Mackenzie Room,0.988046,,9.2,"415 Powell St (Powell Street), Vancouver BC V6...",35.0,39.0,7.0
1872,49.27247,-123.185643,472,Jericho Beach,0.996587,,9.2,"3941 Pt Grey Rd, Vancouver BC V6R 1B5",475.0,110.0,8.0
90,49.282409,-123.118541,125,Hawksworth Restaurant,0.986635,4.0,9.1,"801 Georgia St W (at Howe St), Vancouver BC V6...",405.0,272.0,66.0
13,49.274566,-123.121817,272,Tacofino,0.972679,1.0,9.1,"1025 Mainland St (Nelson), Vancouver BC V6B 5P9",39.0,94.0,24.0
604,49.265605,-123.152566,516,Raisu,0.979295,2.0,9.1,"2340 4th Ave W, Vancouver BC V6K 1P1",191.0,73.0,11.0
276,49.279821,-123.10802,472,Keefer Bar,0.968446,3.0,9.1,"135 Keefer St (btwn Columbia & Main), Vancouve...",245.0,233.0,60.0


In [339]:
#Get top from yelp results
yelp_unique_establishments = yelp_df_final.drop_duplicates('name')
yelp_unique_establishments.sort_values('rating', ascending=False).head(10)

Unnamed: 0,latitude,longitude,name,total_ratings,rating,price,distance,address
353,49.288444,-123.139203,Arike Restaurant,25,10.0,,221.08433,"1725 Davie St, Vancouver, BC V6G 1W5, Canada"
368,49.288444,-123.139203,Score on Davie,349,9.0,2.0,757.712901,"1262 Davie Street, Vancouver, BC V6J 5L1, Canada"
359,49.288444,-123.139203,Guu with Garlic,1054,9.0,2.0,423.396091,"1698 Robson Street, Vancouver, BC V6G 1C7, Canada"
1954,49.277665,-123.073653,Dundas Eat + Drink,72,9.0,2.0,1169.218472,"2077 Dundas Street, Vancouver, BC V5L 1J5, Canada"
23,49.274566,-123.121817,Blue Water Cafe,1070,9.0,4.0,190.502213,"1095 Hamilton St, Vancouver, BC V6B 5T4, Canada"
681,49.272827,-123.147744,Grapes & Soda,44,9.0,2.0,931.131063,"1541 W 6th Avenue, Vancouver, BC V6J 1R1, Canada"
398,49.28062,-123.12482,Beach Ave Bar And Grill,26,9.0,2.0,967.953201,"1 -1012 Beach Avenue, Vancouver, BC V6E 1T7, C..."
1933,49.277469,-123.081315,The Tiki Bar,32,9.0,2.0,666.708667,"1489 E Hastings Street, Vancouver, BC V5L 1S4,..."
144,49.271721,-123.104207,Oyster Express,165,9.0,2.0,974.804233,"296 Keefer Street, Vancouver, BC V6A 1X5, Canada"
40,49.279764,-123.110154,Alouette Bistro,44,9.0,,851.773421,"567 Hornby Street, Vancouver, BC V6C 2E8, Canada"


In [345]:
foursquare_df_compare = fsq_unique_establishments['name'].isin(yelp_unique_establishments['name'])
foursquare_df_compare.head(10)

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
Name: name, dtype: bool