In [None]:
import scipy as sp
import numpy as np
import pandas as pd
import requests

In [None]:
lat = bike_stations['latitude']
long = bike_stations['longitude']
station_count = len(bike_stations)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice.

For the API request below a reduced radius of 500m was applied. Dublin was chosen as my city and the city centre where the bicycle stations are located is pretty small. A radius of 1000m essentially covers the entire city. As such it wouldn't provide much insight about immediate area surrounding the bicycle stations.

In [None]:
def FSQ_POIs(lat, long):
 #API request fields   
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq3iSSoaBgIJQOOuI1ttXRR7jgaVDhI6ZVEOuZbji/avyE="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(lat) + ',' + str(long)
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps + '&radius=500&limit=50&fields='+ fields

  #API request 
    FSQ_api_request = requests.get(url, headers=headers)
  
  #Dataframe generation
    FSQ_near_by_json = FSQ_api_request.json()
    FSQ_near_by = pd.json_normalize(FSQ_near_by_json, record_path='results')

  #Addition of unique reference for each bike station  
    gps_series = pd.Series(gps)
    gps_ref_df = gps_series.repeat(len(FSQ_near_by)).to_frame()
    gps_ref_df = gps_ref_df.rename(columns={0:'gps_ref'})
    gps_ref_df = gps_ref_df.reset_index()
    FSQ_near_by = pd.concat([FSQ_near_by, gps_ref_df], axis=1)
    
  #Cleaning
    FSQ_near_by =  FSQ_near_by[['gps_ref','name', 'distance', 'rating', 'stats.total_ratings','categories', 'geocodes.main.latitude', 'geocodes.main.longitude', 'location.formatted_address']]
    FSQ_near_by = FSQ_near_by.rename(columns={'stats.total_ratings':'total_ratings', 'geocodes.main.latitude':'lattitude', 'geocodes.main.longitude':'longitude', 'location.formatted_address':'address'})
    return FSQ_near_by

#Repeat for all bike stations
FSQ_dfs_list = []
for i in range(station_count):
  poi_df = FSQ_POIs(lat[i], long[i])
  FSQ_dfs_list.append(poi_df)

FSQ_df = x = pd.DataFrame()
x = x = pd.DataFrame()

# concatenate full df object of all stn site data:
for poi_df in FSQ_dfs_list:
    x = pd.concat([FSQ_df, poi_df])
    FSQ_df = x

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
#example filter for 'pub'
FSQ_df['categories'] = FSQ_df['categories'].apply(str)
FSQ_df.loc[FSQ_df['categories'].str.contains("pub", case=False)]

#example filter for 'cafe'
FSQ_df['categories'] = FSQ_df['categories'].apply(str)
FSQ_df.loc[FSQ_df['categories'].str.contains("cafe", case=False)]

Put your parsed results into a DataFrame

For my analysis I created summarised dataframes for each API based on a select number of categories and the attributes of those categories. I then created a unique id for each bike station (gps_ref) and grouped the summaries based on those ids. An example is shown below for 'cafes' near bike stations. There definitely is a more efficient way to achieve this, as you can see below there is a good deal of manipulation of column titles and indexes required.

In [None]:
#Parse out field of interest
FSQ_df['categories'] = FSQ_df['categories'].apply(str)
FSQ_df_cafes = FSQ_df.loc[FSQ_df['categories'].str.contains("cafe", case=False)]
# Create a series for attribut of interest grouped by gps_ref of bike station
FSQ_cafes = FSQ_df_cafes['gps_ref'].value_counts()
FSQ_cafe_rating = FSQ_df_cafes.groupby(['gps_ref'])['rating'].mean()

#Merge results into a dataframe
FSQ_cafe_rating = pd.DataFrame(FSQ_cafe_rating)
FSQ_cafe_rating = FSQ_cafe_rating.reset_index()
FSQ_cafe_rating = FSQ_cafe_rating.rename(columns={'rating':'FSQ_avg_cafe_rating'})
FSQ_cafes = pd.DataFrame(FSQ_cafes)
FSQ_cafes = FSQ_cafes.reset_index()
FSQ_cafes = FSQ_cafes.rename(columns={'index':'gps_ref', 'gps_ref':'FSQ_cafe_count'})

#Repeat above for Pubs
FSQ_pub_count = FSQ_df_pubs['gps_ref'].value_counts()
FSQ_pub_count = pd.DataFrame(FSQ_pub_count)
FSQ_pub_count = FSQ_pub_count.reset_index()
FSQ_pub_count = FSQ_pub_count.rename(columns={'index':'gps_ref', 'gps_ref':'FSQ_pub_count'})
FSQ_pub_rating = FSQ_df_pubs.groupby(['gps_ref'])['rating'].mean()
FSQ_pub_rating = FSQ_pub_rating.reset_index()
FSQ_pub_rating = FSQ_pub_rating.rename(columns={'rating':'FSQ_avg_pub_rating'})

#Merge summaries into dataframe
FSQ_cafe_summary_df = FSQ_cafes.merge(FSQ_cafe_rating, how='left', on='gps_ref')
FSQ_pub_summary_df = FSQ_pub_count.merge(FSQ_pub_rating, how='left', on='gps_ref')
FSQ_summary_df = FSQ_pub_summary_df.merge(FSQ_cafe_summary_df, how='left', on='gps_ref')


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
def Yelp_POIs (lat, long):
  # API request fields
    headers = {
    "accept": "application/json",
    "Authorization": "Bearer Bz_Eh2Iu4T3_tcMGLS8cbd9Ctafvk8kAyh4HZLNfvrDEQlyNZdVhX-nmaCoff4Wzz6Gf2tSBx3zmH0XkfG2gFbZ_ejxdIiLW6hhAU0r36DXLNJkVIxu0nG3X27iLY3Yx"}
    gps_yelp = 'latitude='+str(lat)+'&longitude='+str(long)
    url = "https://api.yelp.com/v3/businesses/search?"+gps_yelp+"&radius=500&sort_by=best_match&limit=50"

  # API request
    yelp_api_request = requests.get(url, headers=headers)
    
  # Dataframe generation
    yelp_near_by_json = yelp_api_request.json()
    yelp_near_by_df = pd.json_normalize(yelp_near_by_json, record_path = 'businesses')
    
  #Addition of unique reference for each bike station 
    gps = str(lat) + ',' + str(long)
    gps_series = pd.Series(gps)
    gps_ref_df = gps_series.repeat(len(yelp_near_by_df)).to_frame()
    gps_ref_df = gps_ref_df.rename(columns={0:'gps_ref'})
    gps_ref_df = gps_ref_df.reset_index()
    yelp_near_by_df = pd.concat([yelp_near_by_df, gps_ref_df], axis=1)

  #Cleaning
    yelp_near_by_df = yelp_near_by_df.drop(columns=['index', 'id', 'alias', 'image_url', 'is_closed', 'url', 'transactions', 'location.address1', 'location.address2', 'location.address3', 'location.city', 'location.zip_code', 'location.country', 'location.state', 'price', 'phone', 'display_phone'])
    yelp_near_by_df = yelp_near_by_df.rename(columns={'review_count':'rating_count','coordinates.latitude':'lattitude', 'coordinates.longitude':'longitude', 'location.display_address':'address'})
    yelp_near_by_df = yelp_near_by_df[['gps_ref','name', 'distance', 'rating', 'rating_count','categories', 'lattitude', 'longitude', 'address']]
    return yelp_near_by_df

#Repeat for all bike stations
Yelp_dfs_list = []
for i in range(station_count):
  poi_df = Yelp_POIs(lat[i], long[i])
  Yelp_dfs_list.append(poi_df)


Yelp_df = x = pd.DataFrame()
x = x = pd.DataFrame()

# concatenate full df object of all stn site data:
for poi_df in Yelp_dfs_list:
    x = pd.concat([Yelp_df, poi_df])
    Yelp_df = x

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

As with FSQ, I parsed out Pubs and Cafes as categories and created attributes for average rating and the number of each category near each bike station and summarised it into a dataframe.

In [None]:
#Parse out Pubs
Yelp_df['categories'] = Yelp_df['categories'].apply(str)
yelp_df_pubs = Yelp_df.loc[Yelp_df['categories'].str.contains("pub", case=False)]

#Define attributes
yelp_pubs = yelp_df_pubs['gps_ref'].value_counts()
yelp_pub_rating = yelp_df_pubs.groupby(['gps_ref'])['rating'].mean()

#Parse out cafes
yelp_df_cafes = Yelp_df.loc[Yelp_df['categories'].str.contains("cafe", case=False)]

#Define attributes
yelp_cafes = yelp_df_cafes['gps_ref'].value_counts()
yelp_cafe_rating = yelp_df_cafes.groupby(['gps_ref'])['rating'].mean()

#Merge into a summary dataframe
yelp_summary_df = pd.concat([yelp_cafes, yelp_cafe_rating], axis=1)
yelp_summary_df = yelp_summary_df.rename(columns={'index':'gps_ref', 'gps_ref':'yelp_cafe_count', 'rating':'yelp_avg_cafe_rating'})

yelp_summary_df = pd.concat([yelp_summary_df, yelp_pubs, yelp_pub_rating], axis=1)
yelp_summary_df = yelp_summary_df.reset_index()
yelp_summary_df = yelp_summary_df.rename(columns={'index':'gps_ref', 'gps_ref':'yelp_pub_count', 'rating':'yelp_avg_pub_rating'})

Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

I think the application of these APIs would be location dependent. When you compare the number of ratings available for FSQ versus Yelp we can see there are far more ratings captured using FSQ. It was also observed for Yelp that when searching for 'restaurants' only 6 unique values were returned. There are more than 6 restaurants in Dublin city centre. This suggests to me that Yelps data is perhaps less 'complete' than FSQ when looking for places in Dublin. Perhaps Yelp is not widely used in Ireland?
What confounds comparing the APIs as well is that they are both limited to 50 responses per request. What this means is that with a broad search we only get a sample of the places in a certain category returned. For example you may get back 5 cafes on one search and 10 the next time you execute it. All this is to say is that the responses for a given category will vary and this will cause inaccuracies in the conclusions made from them.
One potential way around this is to tune the request to look for a single category in the hope of getting a complete sample but unfortunately there was not enough time to completed this.

Get the top 10 restaurants according to their rating

In [None]:
#Top 10 restaurants FSQ
FSQ_df_restaurants = (FSQ_df.loc[FSQ_df['categories'].str.contains("restaurant", case=False)]).drop_duplicates(subset=['name']).sort_values(by='rating', ascending = False)
FSQ_df_restaurants.head(10)

#Top 10 restaurants Yelp
Yelp_df_restaurants = (Yelp_df.loc[Yelp_df['categories'].str.contains("restaurant", case=False)]).drop_duplicates(subset=['name']).sort_values(by='rating', ascending = False)
Yelp_df_restaurants.head(10)