In [2]:
# imports
import requests
import os
import pandas as pd
import time
import numpy as np

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [5]:
# Assigning API key stored in an environtment variable
api_key = os.environ["FOURSQUARE_API_KEY"]

# API endpoint
url = "https://api.foursquare.com/v3/places/search"

# header
headers = {
    "accept": "application/json",
    "Authorization": api_key
}

# Parameters for Foursquare API 
bixi_data = pd.read_csv('bixi_bike_stations.csv') # from city_bikes.ipynb
radius = 1000
category_id = '13032,13065' # id for <Cafe, Coffee, and Tea House> and <Restaurant>
fields = 'name,categories,distance,rating,popularity,price'

# Rate limit in seconds
rate_limit = 1/50

# List used to store data from API request
results_list = []

# Loop through each station and make an API request
for index, station in bixi_data.iterrows():
    params = {
        'll': f"{station['latitude']},{station['longitude']}", # Location: Montreal, QC
        'radius': radius,
        'categories': category_id,
        'fields': fields
    }
    
    response = requests.get(url, params=params, headers=headers)
    # Checking for the status code and error handling
    if response.status_code == 200:
        data = response.json()
        results_list.append(data)
    else:
        print(f"Error: {response.status_code}")
    
    # Rate limit delay to avoid getting restriction on API requests
    time.sleep(rate_limit)

In [None]:
results_list

##### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [28]:
# Extracting relevant data

data_list = []
# First loop, since the data from results_list is semi-structured (nested)
for entry in results_list:
    # Second loop to get relevant data inside 'results'
    for result in entry['results']:
        result_data = {
            # Using .get() method in case a key is missing/does not exist
            'POI name': result.get('name'),
            'distance': result.get('distance'),
            'price': result.get('price'),
            'popularity': result.get('popularity'),
            'rating': result.get('rating', np.nan),
            'latitude': repr(entry['context']['geo_bounds']['circle']['center']['latitude']), # Data extracted from the first loop 
            'longitude': repr(entry['context']['geo_bounds']['circle']['center']['longitude']) # Data extracted from the first loop
        }
        data_list.append(result_data)

##### Put your parsed results into a DataFrame

In [29]:
df_fs = pd.DataFrame(data_list)

df_fs

Unnamed: 0,POI name,distance,price,popularity,rating,latitude,longitude
0,Vua Sandwichs,283,1.0,0.957068,8.5,45.516926210319546,-73.56425732374191
1,Mamie Clafoutis,505,2.0,0.974074,9.0,45.516926210319546,-73.56425732374191
2,Saint-Houblon,301,3.0,0.984992,8.3,45.516926210319546,-73.56425732374191
3,Café Saint-Henri Quartier Latin,294,1.0,0.955926,8.2,45.516926210319546,-73.56425732374191
4,Bouillon Bilk,675,3.0,0.971504,9.4,45.516926210319546,-73.56425732374191
...,...,...,...,...,...,...,...
1587,Restaurant Vargas,279,2.0,0.987204,8.2,45.50373771539475,-73.56948494911194
1588,Les 3 Brasseurs,195,2.0,0.994255,8.0,45.50373771539475,-73.56948494911194
1589,Café Humble Lion,434,2.0,0.975087,8.5,45.50373771539475,-73.56948494911194
1590,Reuben's Deli,527,2.0,0.990344,8.7,45.50373771539475,-73.56948494911194


In [31]:
# Grouping by 'latitude' and 'longitude' and calculating averages
df_fs_avg = df_fs.groupby(['latitude', 'longitude']).agg({
    'distance': 'mean',
    'price': 'mean',
    'popularity': 'mean',
    'rating': 'mean'
}, skipna=True).reset_index()

# Renaming the columns for clarity
df_fs_avg = df_fs_avg.rename(columns={
    'distance': 'average_distance',
    'price': 'average_price',
    'popularity': 'average_popularity',
    'rating': 'average_rating'
})

df_fs_avg

Unnamed: 0,latitude,longitude,average_distance,average_price,average_popularity,average_rating
0,45.46766565386597,-73.59391784324544,531.2,1.500000,0.955817,8.550000
1,45.46976497412213,-73.58923405408859,385.5,1.500000,0.955817,8.550000
2,45.472636238971525,-73.58546018600464,495.1,1.222222,0.955844,8.330000
3,45.4801160762769,-73.58560770750046,597.7,2.000000,0.966968,8.690000
4,45.48020822800015,-73.57759863138199,289.6,2.333333,0.965048,8.870000
...,...,...,...,...,...,...
155,45.55988367688166,-73.63356828689575,965.0,1.000000,0.961656,
156,45.56001959585449,-73.534934520776,800.5,1.700000,0.943463,7.555556
157,45.56157008176612,-73.65338176488876,421.9,1.444444,0.934679,7.730000
158,45.5616903,-73.610512,404.5,1.428571,0.924680,6.814286


In [9]:
# Checking why row 155 from df_fs_avg has NaN in the average_rating column

# Filtered by latitude and longitude from df_fs 
filtered_rows = df_fs[(df_fs['latitude'] == '45.55988367688166') & (df_fs['longitude'] == '-73.63356828689575')]
filtered_rows

# Solved: the 2 POIs have no rating for this bike station

Unnamed: 0,POI name,distance,popularity,rating,latitude,longitude
60,Café Vienne,935,0.961656,,45.55988367688166,-73.63356828689575
61,L'Oeufrier Cremazie,995,,,45.55988367688166,-73.63356828689575


In [32]:
# Saving dataframe as a csv file
df_fs_avg.to_csv('foursquare_df.csv', index=False)

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [22]:
# Assigning API key stored in an environtment variable
YELP_API_KEY = os.environ["YELP_API_KEY"]

# API endpoint
url = "https://api.yelp.com/v3/businesses/search"

# header
headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {YELP_API_KEY}"
}

# Parameters for Yelp API request 
bixi_data = pd.read_csv('bixi_bike_stations.csv') # from city_bikes.ipynb
radius = 1000
categories = 'coffee,restaurants' # id for <Coffee> and <Restaurants>

# Rate limit in seconds
rate_limit = 1/50

# List used to store data from API request
results_list_yelp = []

# Loop through each station and make an API request
for index, station in bixi_data.iterrows():
    params = {
        'latitude': station['latitude'], # Location: Montreal, QC
        'longitude': station['longitude'], # Location: Montreal, QC
        'radius': radius,
        'categories': categories
    }
    
    response = requests.get(url, params=params, headers=headers)
    # Checking for the status code and error handling
    if response.status_code == 200:
        data = response.json()
        results_list_yelp.append(data)
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        break
    
    # Rate limit delay to avoid getting restriction on API requests
    time.sleep(rate_limit)

In [None]:
results_list_yelp

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [42]:
# Extracting relevant data

data_list_yelp = []
# First loop, since the data from results_list is semi-structured (nested)
for entry in results_list_yelp:
    # Second loop to get relevant data inside 'businesses'
    for business in entry['businesses']:
        result_data = {
            # Using .get() method in case a key is missing/does not exist
            'POI name': business.get('name'),
            'distance': business.get('distance'),
            'price': business.get('price'),
            'rating': business.get('rating'),
            'Number of Reviews': business.get('review_count'),
            'latitude': repr(entry['region']['center']['latitude']), # Data extracted from the first loop 
            'longitude': repr(entry['region']['center']['longitude']) # Data extracted from the first loop
        }
        data_list_yelp.append(result_data)

Put your parsed results into a DataFrame

In [75]:
df_yelp = pd.DataFrame(data_list_yelp)

df_yelp

Unnamed: 0,POI name,distance,price,rating,Number of Reviews,latitude,longitude
0,Le Saint-Bock,126.552913,$$,4.0,208,45.516926210319546,-73.56425732374191
1,Pizzeria Dei Compari,219.943745,$$,4.0,91,45.516926210319546,-73.56425732374191
2,L'Amère à Boire,150.012599,$$,4.0,68,45.516926210319546,-73.56425732374191
3,Poutineville,735.204551,$$,4.5,602,45.516926210319546,-73.56425732374191
4,Bouillon Bilk,663.213727,$$$,4.5,503,45.516926210319546,-73.56425732374191
...,...,...,...,...,...,...,...
3162,Les Enfants Terribles,253.132737,$$,3.5,219,45.50373771539475,-73.56948494911194
3163,Café Parvis,232.204539,$$,4.0,223,45.50373771539475,-73.56948494911194
3164,Burger Bar Crescent,866.706777,$$,4.5,555,45.50373771539475,-73.56948494911194
3165,Sansotei,294.212167,$$,4.5,45,45.50373771539475,-73.56948494911194


In [77]:
# Reformatting the price column into type int
price_mapping = {'$': 1, '$$': 2, '$$$': 3, '$$$$': 4}
df_yelp['price'] = df_yelp['price'].map(price_mapping)

df_yelp

Unnamed: 0,POI name,distance,price,rating,Number of Reviews,latitude,longitude
0,Le Saint-Bock,126.552913,2.0,4.0,208,45.516926210319546,-73.56425732374191
1,Pizzeria Dei Compari,219.943745,2.0,4.0,91,45.516926210319546,-73.56425732374191
2,L'Amère à Boire,150.012599,2.0,4.0,68,45.516926210319546,-73.56425732374191
3,Poutineville,735.204551,2.0,4.5,602,45.516926210319546,-73.56425732374191
4,Bouillon Bilk,663.213727,3.0,4.5,503,45.516926210319546,-73.56425732374191
...,...,...,...,...,...,...,...
3162,Les Enfants Terribles,253.132737,2.0,3.5,219,45.50373771539475,-73.56948494911194
3163,Café Parvis,232.204539,2.0,4.0,223,45.50373771539475,-73.56948494911194
3164,Burger Bar Crescent,866.706777,2.0,4.5,555,45.50373771539475,-73.56948494911194
3165,Sansotei,294.212167,2.0,4.5,45,45.50373771539475,-73.56948494911194


In [79]:
# Grouping by 'latitude' and 'longitude' and calculating averages
df_yelp_avg = df_yelp.groupby(['latitude', 'longitude']).agg({
    'distance': 'mean',
    'price': 'mean',
    'Number of Reviews': 'mean',
    'rating': 'mean'
}, skipna=True).reset_index()

# Renaming the columns for clarity
df_yelp_avg = df_yelp_avg.rename(columns={
    'distance': 'average_distance',
    'price': 'average_price',
    'Number of Reviews': 'average_popularity',
    'rating': 'average_rating'
})

df_yelp_avg

Unnamed: 0,latitude,longitude,average_distance,average_price,average_popularity,average_rating
0,45.46766565386597,-73.59391784324544,715.182865,1.933333,46.400000,4.200000
1,45.46976497412213,-73.58923405408859,515.885706,1.857143,46.350000,4.250000
2,45.472636238971525,-73.58546018600464,585.179531,2.058824,93.650000,4.175000
3,45.4801160762769,-73.58560770750046,613.092906,2.166667,175.150000,4.200000
4,45.48020822800015,-73.57759863138199,329.110231,2.235294,167.150000,4.150000
...,...,...,...,...,...,...
155,45.55988367688166,-73.63356828689575,955.242544,,4.000000,3.000000
156,45.56001959585449,-73.534934520776,857.702183,1.600000,14.833333,3.722222
157,45.56157008176612,-73.65338176488876,481.084230,2.083333,7.750000,4.025000
158,45.5616903,-73.610512,559.785309,1.555556,8.750000,3.500000


In [80]:
# Saving dataframe as a csv file
df_yelp_avg.to_csv('yelp_df.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [81]:
# Foursquare

# Dropping duplicate restaurants/coffee shops
df_fs_unique = df_fs.drop_duplicates(subset='POI name', keep='first')
df_fs_unique.info()

<class 'pandas.core.frame.DataFrame'>
Index: 413 entries, 0 to 1577
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   POI name    413 non-null    object 
 1   distance    413 non-null    int64  
 2   price       365 non-null    float64
 3   popularity  412 non-null    float64
 4   rating      376 non-null    float64
 5   latitude    413 non-null    object 
 6   longitude   413 non-null    object 
dtypes: float64(3), int64(1), object(3)
memory usage: 25.8+ KB


In [68]:
# Yelp

# Dropping duplicate restaurants/coffee shops
df_yelp_unique = df_yelp.drop_duplicates(subset='POI name', keep='first')
df_yelp_unique.info()

<class 'pandas.core.frame.DataFrame'>
Index: 724 entries, 0 to 3139
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   POI name           724 non-null    object 
 1   distance           724 non-null    float64
 2   price              541 non-null    float64
 3   rating             724 non-null    float64
 4   Number of Reviews  724 non-null    int64  
 5   latitude           724 non-null    object 
 6   longitude          724 non-null    object 
dtypes: float64(3), int64(1), object(3)
memory usage: 45.2+ KB


The Yelp API provided more complete data. Compared to the Foursquare API, Yelp returned more unique restaurants and coffee shops and it has fewer NULL values than Foursquare.

Get the top 10 restaurants according to their rating

In [59]:
# Top 10 restaurant by rating from Foursquare

# Drop duplicates based on the 'POI name' column
unique_restaurants_df = df_fs.drop_duplicates(subset='POI name', keep='first')

# Sort the dataframe by the 'rating' column in descending order
top10_restaurants = unique_restaurants_df.sort_values(by='rating', ascending=False).head(10)

# Resetting index and make it start at 1
top10_restaurants.reset_index(drop=True, inplace=True)
top10_restaurants.index = top10_restaurants.index + 1

top10_restaurants

Unnamed: 0,POI name,distance,price,popularity,rating,latitude,longitude
1,Cadet,741,,0.981131,9.5,45.516926210319546,-73.56425732374191
2,Bouillon Bilk,675,3.0,0.971504,9.4,45.516926210319546,-73.56425732374191
3,Au Kouign-Amann,509,2.0,0.968422,9.3,45.52723106564012,-73.58672618865965
4,Olive & Gourmando,528,2.0,0.976949,9.3,45.50206,-73.56295
5,Darling,456,3.0,0.979904,9.2,45.51689731423111,-73.58910799026489
6,Cafe Italia,277,1.0,0.972917,9.2,45.53519006163501,-73.61548215150833
7,Crew Collective & Café,294,1.0,0.989859,9.2,45.50206,-73.56295
8,Milos Restaurant,698,4.0,0.96496,9.2,45.527371996141,-73.60398352146149
9,Damas,811,4.0,0.978833,9.2,45.52357479400008,-73.62344294786453
10,Maison Boulud,410,4.0,0.961827,9.2,45.49658063218649,-73.5762146115303


In [60]:
# Top 10 restaurant by rating from Yelp

# Drop duplicates based on the 'POI name' column
unique_restaurants_df_yelp = df_yelp.drop_duplicates(subset='POI name', keep='first')

# Sort the dataframe by the 'Number of Reviews' and 'rating' columns in descending order
top10_restaurants_yelp = unique_restaurants_df_yelp.sort_values(by=['Number of Reviews', 'rating'], ascending=[False, False]).head(10)

# Resetting index and make it start at 1
top10_restaurants_yelp.reset_index(drop=True, inplace=True)
top10_restaurants_yelp.index = top10_restaurants_yelp.index + 1

top10_restaurants_yelp

Unnamed: 0,POI name,distance,price,rating,Number of Reviews,latitude,longitude
1,Schwartz's,586.224718,$$,4.0,3161,45.51487490356938,-73.58486406505108
2,La Banquise,1244.539312,$$,4.0,2201,45.516926210319546,-73.56425732374191
3,Olive + Gourmando,625.795508,$$,4.5,1625,45.49572236030835,-73.55461344122887
4,Au Pied de Cochon,985.126709,$$$,4.0,1395,45.516926210319546,-73.56425732374191
5,L'Avenue,592.443665,$$,4.5,1270,45.52689,-73.57264
6,Modavie,740.484801,$$$,4.0,889,45.5121682576952,-73.55481594800949
7,Joe Beef,281.761594,$$$,4.5,768,45.485328993998365,-73.57692271471024
8,Kazu,1056.434725,$$,4.5,720,45.49788352593901,-73.56856763362885
9,Nouilles de Lan Zhou,613.652227,$$,4.5,681,45.5121682576952,-73.55481594800949
10,L'Express,875.783817,$$$,4.0,681,45.516926210319546,-73.56425732374191
