In [13]:
# imports
import pandas as pd
import json
import requests
import os
from pprint import pp
import time

# Foursquare

#### Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [24]:
# Setting variables
api_key = os.environ['FOUR_SQUARE_API']
url = 'https://api.foursquare.com/v3/places/search'
headers = {
    'Accept': 'application/json',
    'Authorization': api_key
}
radius  = 1000 # 1 km
time_per_request = 1 / 100  # 1 second / 100 QPS - FourSquare API rate limit
responses = [] # list for dumping responses along the way in case of error

In [4]:
# Load locations from stations.csv
stations = pd.read_csv('../data/stations.csv', index_col=None)

# Create a list of paired (latitude, longitude)
locations = [f"{lat},{long}" for lat, long in zip(stations['latitude'], stations['longitude'])]

In [33]:
# GET request for each locations

# Continue from last error, if any
try:
    with open('../data/responses_fsq.json', 'r') as f:
        responses = json.load(f)
except FileNotFoundError:
    pass

# Loop throught locations
for location in locations[len(responses):]:
    params = {
        'll': location,
        'radius': radius
    }
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()

        responses.append(response.json())

        # Save intermediate progress after each successful request
        with open('../data/responses_fsq.json', 'w') as f:
            json.dump(responses, f)

    except requests.exceptions.RequestException as error:
        print(f"Error: {error}")
        # Break out of the loop when an error occurs
        break

    time.sleep(time_per_request)

    # Stop the loop if an error occurred
    if 'error' in response.json():
        break

In [59]:
from pandas import json_normalize
flattened_data = []

# Iterate through each response
for response in responses:
    # Extract relevant information from the nested dictionaries and flatten
    flattened_results = json_normalize(response.get('results', []), sep='_')
    
    # Add the flattened results to the list
    flattened_data.extend(flattened_results.to_dict('records'))

# Create a DataFrame from the flattened data
flattened_df = pd.DataFrame(flattened_data)

# Display the resulting DataFrame
print(flattened_df)

                        fsq_id  \
0     4e0b48221f6edc06be259aec   
1     4ea9b28cb803cf1ffacd7c0d   
2     52ffddfb498e1ae6fe109272   
3     4b68b939f964a52040892be3   
4     4de7c3322271b9ccf4e4edbe   
...                        ...   
7891  510184c6e4b052bb765a20d6   
7892  4b049370f964a520505522e3   
7893  4c59c9fe2fa89c7495ce1323   
7894  4ad4c06cf964a520cef920e3   
7895  4ad4c06cf964a520cbf920e3   

                                             categories chains  distance  \
0     [{'id': 10039, 'name': 'Music Venue', 'icon': ...     []        19   
1     [{'id': 13034, 'name': 'Café', 'icon': {'prefi...     []        79   
2     [{'id': 16037, 'name': 'Playground', 'icon': {...     []       106   
3     [{'id': 12058, 'name': 'Elementary School', 'i...     []       226   
4     [{'id': 13002, 'name': 'Bakery', 'icon': {'pre...     []       240   
...                                                 ...    ...       ...   
7891  [{'id': 13276, 'name': 'Sushi Restaurant', 'ic...    

In [57]:
# Create an empty list to hold the flattened data

flattened_data = []

for response in responses:
    # Extract relevant information from the nested dictionaries
    for result in response.get('results', []):
        fsq_id = result.get('fsq_id')
        categories = result.get('categories', [])
        category_name = categories[0]['name'] if categories else None
        name = result.get('name')
        location = result.get('location', {})
        latitude = location.get('latitude')
        longitude = location.get('longitude')
        
        flattened_item = {
            'fsq_id': fsq_id,
            'category': category_name,
            'name': name,
            'latitude': latitude,
            'longitude': longitude
            # Add more keys as needed
        }
        flattened_data.append(flattened_item)
df = pd.DataFrame(flattened_data)
df

Unnamed: 0,fsq_id,category,name,latitude,longitude
0,4e0b48221f6edc06be259aec,Music Venue,Salle Désilets,,
1,4ea9b28cb803cf1ffacd7c0d,Café,Café l'Exil,,
2,52ffddfb498e1ae6fe109272,Playground,Fun O Max,,
3,4b68b939f964a52040892be3,Elementary School,Académie Louis-Pasteur,,
4,4de7c3322271b9ccf4e4edbe,Bakery,Capucine,,
...,...,...,...,...,...
7891,510184c6e4b052bb765a20d6,Sushi Restaurant,Saint Sushi Bar,,
7892,4b049370f964a520505522e3,Restaurant,La Fabrique Bistrot,,
7893,4c59c9fe2fa89c7495ce1323,French Restaurant,Restaurant de l'ITHQ,,
7894,4ad4c06cf964a520cef920e3,Park,Parc la Fontaine,,


#### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

#### Put your parsed results into a DataFrame

# Yelp

#### Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [45]:
# Setting variables
api_key = os.environ['YELP_API']
url = 'https://api.yelp.com/v3/businesses/search'
headers = {
    'Accept': 'application/json',
    'Authorization': 'Bearer ' + api_key
}
radius  = 1000 # 1 km
time_per_request = 1 / 100  # 1 second / 100 QPS - Cannot find real QPS - Limit 500 per day
responses = [] # list for dumping responses along the way in case of error

In [50]:
# GET request for each locations

# Continue from last error, if any, or limit obtained for last day
try:
    with open('../data/responses_yelp.json', 'r') as f:
        responses = json.load(f)
except FileNotFoundError:
    pass

# Loop throught locations - Limit Max 500 per day

max_iterations = 497
start_index = len(responses)
end_index = min(start_index + max_iterations, len(stations) - 1)

for index in range(start_index, end_index):
    params = {
        'latitude': stations.iloc[index]['latitude'],
        'longitude': stations.iloc[index]['longitude'],
        'radius': radius
    }
    try:
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()

        responses.append(response.json())

        # Save intermediate progress after each successful request
        with open('../data/responses_yelp.json', 'w') as f:
            json.dump(responses, f)

    except requests.exceptions.RequestException as error:
        print(f"Error: {error}")
        # Break out of the loop when an error occurs
        break

    time.sleep(time_per_request)

    # Stop the loop if an error occurred
    if 'error' in response.json():
        break

#### Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [52]:
print(len(responses))

498


#### Put your parsed results into a DataFrame

# Comparing Results

#### Which API provided you with more complete data? Provide an explanation. 

#### Get the top 10 restaurants according to their rating