In [1]:
import sqlite3
import pandas as pd
import requests

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
API_KEY = 'fsq3hhg8hTQCfL/aPuz1PSLab+R48Pg6oojSQ2NqplkdW8A='  

def fetch_pois_from_foursquare(lat, lon, radius=1000, limit=50):
    """
    Fetch POIs from Foursquare around a given latitude and longitude using the Foursquare Places API.
    
    Parameters:
    - lat, lon (float): Latitude and longitude of the center point.
    - radius (int): Radius in meters to search within.
    - limit (int): Maximum number of results to return.
    
    Returns:
    - list of dicts: Each dict contains POI information.
    """
    url = "https://api.foursquare.com/v3/places/search"
    headers = {
        "Accept": "application/json",
        "Authorization": API_KEY
    }
    params = {
        'll': f"{lat},{lon}",
        'radius': radius,
        'limit': limit
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        venues = response.json()['results']
        return [{
            'station_lat': lat,
            'station_lon': lon,
            'poi_name': venue['name'],
            'poi_lat': venue['geocodes']['main']['latitude'],
            'poi_lon': venue['geocodes']['main']['longitude']
        } for venue in venues]
    else:
        return []

# Container for all POIs data
all_pois = []

stations_df = pd.read_csv("/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/stations_data.csv")

# Iterate over each bike station in the DataFrame to fetch nearby POIs
for index, row in stations_df.iterrows():
    pois = fetch_pois_from_foursquare(row['latitude'], row['longitude'])
    all_pois.extend(pois)

# Convert the collected POI data into a DataFrame
pois_df = pd.DataFrame(all_pois)

pois_df.to_csv("/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/pois_data.csv")

print(stations_df)


   Unnamed: 0                 name   latitude  longitude  free_bikes
0           0       C/ BILBAO, 174  41.409020   2.195415           8
1           1    C/ SANT ADRIÀ, 88  41.435460   2.200157           5
2           2   PL. JESÚS CARRASCO  41.445046   2.176726          11
3           3  C/ RAMON TURRÓ, 292  41.403856   2.208426           1
4           4       C/ DEL CANÓ, 1  41.401330   2.157444           5


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [3]:
API_KEY = 'fsq3hhg8hTQCfL/aPuz1PSLab+R48Pg6oojSQ2NqplkdW8A='
stations_df = pd.DataFrame({
    'station_id': [1, 2, 3],
    'latitude': [41.387015, 41.391947, 41.396992],
    'longitude': [2.170047, 2.164485, 2.165105]
})

def fetch_and_parse_pois(lat, lon, radius=1000, limit=50):
    """
    Fetches POIs from Foursquare around a given latitude and longitude.
    Parses the response to extract names, locations, and ratings of venues.
    """
    url = "https://api.foursquare.com/v3/places/search"
    headers = {"Authorization": API_KEY}
    params = {'ll': f"{lat},{lon}", 'radius': radius, 'limit': limit, 'sort': 'RATING'}
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        venues = response.json()['results']
        parsed_venues = []
        for venue in venues:
            venue_details = {
                'name': venue['name'],
                'latitude': venue['geocodes']['main']['latitude'],
                'longitude': venue['geocodes']['main']['longitude'],
                'rating': venue.get('rating', 'No rating')  # Use 'No rating' if rating is not available
            }
            parsed_venues.append(venue_details)
        return parsed_venues
    else:
        print(f"Failed to fetch POIs: {response.status_code}")
        return []

# Fetch and parse POIs for a sample station
example_pois = fetch_and_parse_pois(stations_df.iloc[0]['latitude'], stations_df.iloc[0]['longitude'])

# Notes:
# - Parses each POI to extract and return a simplified dictionary of details.
# - Handles cases where a POI does not have a rating.
# - Checks the status code of the response to handle failed requests.

Put your parsed results into a DataFrame

In [4]:
all_pois = []

for _, row in stations_df.iterrows():
    pois = fetch_and_parse_pois(row['latitude'], row['longitude'])
    all_pois.extend(pois)  # Combine POIs from all stations

pois_df = pd.DataFrame(all_pois)

# Notes:
# - Iterates through each bike station to fetch nearby POIs.
# - Combines all fetched POIs into a single list of dictionaries.
# - Converts this list into a DataFrame for easy handling and analysis.

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in Barcelona

In [13]:
import requests
import pandas as pd

# Yelp API key
api_key = 'Y5wce5j8fpCo6ox2sg-olww45PnV5x7rcW0iPilFzz8roSidViNHLPPCNEjbwWNyTkJb_vW-wdi-Zau1SJcOwtwAutdPF3YR8FE-D7Yi_GOPMrMIxRalrxVUZNYaZnYx'

# Base URL for the Yelp API
url = 'https://api.yelp.com/v3/businesses/search'

# Example coordinates for a location in Barcelona (Latitude, Longitude)
latitude = 41.3851
longitude = 2.1734

# Headers with API Key
headers = {
    'Authorization': f'Bearer {api_key}'
}

# Initialize a list to hold all the business data
all_businesses = []

# Pagination setup
offset = 0
limit = 50  # Maximum number of results per request as per Yelp's API
total = None  # Total will be set after the first request

# Loop to handle pagination
while total is None or offset < total:
    # Parameters for the query
    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': 1000,  # Search radius in meters
        'categories': 'bikesharestations',  # Searching for bike share stations
        'limit': limit,
        'offset': offset
    }

    # Sending GET request to Yelp API
    response = requests.get(url, headers=headers, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        # Convert response to JSON format
        data = response.json()
        total = data['total']  # Update total count of businesses
        all_businesses.extend(data['businesses'])  # Extend the list with new businesses
        offset += limit  # Increase offset for next page
    else:
        print(f"Failed to fetch data: {response.status_code}")
        break

# Convert collected business data to DataFrame
stations_df = pd.DataFrame([{
    'Name': business['name'],
    'Rating': business.get('rating', 'No rating'),  # Safe access to rating
    'Location': business['location']['address1']
} for business in all_businesses])

# Save to CSV
file_path = "/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/stations_data.csv"
stations_df.to_csv(file_path, index=False)
print(f"Data successfully saved to {file_path}")


    

Failed to fetch data: 400
Data successfully saved to /Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/stations_data.csv


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [6]:
import requests
import pandas as pd

# Define the Yelp API key and setup headers and parameters
api_key = 'Y5wce5j8fpCo6ox2sg-olww45PnV5x7rcW0iPilFzz8roSidViNHLPPCNEjbwWNyTkJb_vW-wdi-Zau1SJcOwtwAutdPF3YR8FE-D7Yi_GOPMrMIxRalrxVUZNYaZnYx'
headers = {'Authorization': f'Bearer {api_key}'}
url = "https://api.yelp.com/v3/businesses/search"
params = {
    'term': 'restaurants',
    'location': 'Barcelona',
    'limit': 50  # Adjust the limit as needed, up to 50 for Yelp API
}

# Make the API request to Yelp
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
    data = response.json()
    restaurants = [{
        'Name': business['name'],
        'Rating': business['rating'],
        'Location': ", ".join([business['location']['address1'], business['location']['city'], business['location']['country']])
    } for business in data['businesses']]
    # Convert the list of dictionaries into a DataFrame
    restaurants_df = pd.DataFrame(restaurants)
    print(restaurants_df)
    # Save to CSV
    restaurants_df.to_csv("/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/restaurants_df.csv")
else:
    print(f"Failed to fetch data: {response.status_code}")



                              Name  Rating  \
0              Cerveseria Catalana     4.4   
1                            Arume     4.6   
2            My Fucking Restaurant     4.4   
3                        La Pepita     4.6   
4               Churrería Laietana     4.8   
5                        Micu Maku     4.8   
6                           Arcano     4.5   
7                       Casa Lolea     4.6   
8                      Somorrostro     4.4   
9                    Ciutat Comtal     4.2   
10                            TAPS     4.9   
11                         Cera 23     4.6   
12                           Tapeo     4.6   
13  Restaurant La Tasqueta de Blai     4.5   
14                         Vinitus     4.6   
15                          Cañete     4.6   
16                       Disfrutar     4.8   
17                       Amarre 69     4.6   
18                    Billy Brunch     4.8   
19             El Asador de Aranda     4.7   
20                    La Catalista

Put your parsed results into a DataFrame

In [12]:
import requests
import pandas as pd

# Sending the request to Yelp to get restaurant data
api_key = 'Y5wce5j8fpCo6ox2sg-olww45PnV5x7rcW0iPilFzz8roSidViNHLPPCNEjbwWNyTkJb_vW-wdi-Zau1SJcOwtwAutdPF3YR8FE-D7Yi_GOPMrMIxRalrxVUZNYaZnYx'
headers = {'Authorization': f'Bearer {api_key}'}
url = "https://api.yelp.com/v3/businesses/search"
params = {
    'term': 'restaurants',
    'location': 'Barcelona',
    'limit': 50  # Adjust the limit as needed, up to 50 for Yelp API
}

# Sending the request to Yelp to get restaurant data
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
    data = response.json()
    restaurants = [{
        'Name': business['name'],
        'Rating': business['rating'],
        'Location': ", ".join([business['location']['address1'], business['location']['city'], business['location']['country']])
    } for business in data['businesses']]
    # Converting into a DataFrame
    restaurants_df = pd.DataFrame(restaurants)
    # Save to CSV
    restaurants_df.to_csv("/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/restaurants_df.csv", index=False)
    print(f"Data successfully saved to /Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/restaurants_df.csv")
else:
    print(f"Failed to fetch data: {response.status_code}, Response: {response.text}")

Data successfully saved to /Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/restaurants_df.csv


# Comparing Results

For this project, Foursquare proved to be the more effective API, primarily due to its comprehensive geographic data and wide range of Points of Interest (POIs). This information was crucial for examining the spatial relationships between bike stations and nearby amenities, aligning closely with our objectives to analyze urban mobility patterns in Barcelona. While Yelp might have added depth with user reviews and ratings, access limitations prevented its use, making Foursquare the more practical and beneficial choice for my analysis.

Get the top 10 restaurants according to their rating

In [8]:
import requests

# Define function to fetch data from Yelp
def fetch_yelp_data(lat, lon, api_key):
    """Fetch top-rated restaurants from Yelp using coordinates."""
    url = "https://api.yelp.com/v3/businesses/search"
    headers = {"Authorization": f"Bearer {api_key}"}
    params = {
        "latitude": lat,
        "longitude": lon,
        "radius": 1000,
        "categories": "restaurants",
        "sort_by": "rating",
        "limit": 50
    }
    response = requests.get(url, headers=headers, params=params)
    return response.json()['businesses'] if response.status_code == 200 else []

# Define function to fetch data from Foursquare
def fetch_foursquare_data(lat, lon, api_key):
    """Fetch top-rated restaurants from Foursquare using coordinates."""
    url = "https://api.foursquare.com/v3/places/search"
    headers = {
        "Accept": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    params = {
        "ll": f"{lat},{lon}",
        "radius": 1000,
        "limit": 50,
        "categories": "13065"
    }
    response = requests.get(url, headers=headers, params=params)
    return [{'name': place['name'], 'rating': place.get('rating', 0)} for place in response.json()['results']] if response.status_code == 200 else []

# Yelp and Foursquare API keys
yelp_api_key = "Y5wce5j8fpCo6ox2sg-olww45PnV5x7rcW0iPilFzz8roSidViNHLPPCNEjbwWNyTkJb_vW-wdi-Zau1SJcOwtwAutdPF3YR8FE-D7Yi_GOPMrMIxRalrxVUZNYaZnYx"
foursquare_api_key = "fsq3hhg8hTQCfL/aPuz1PSLab+R48Pg6oojSQ2NqplkdW8A="

# Coordinates for Barcelona
latitude = 41.3851
longitude = 2.1734

# Fetch data from both APIs
yelp_results = fetch_yelp_data(latitude, longitude, yelp_api_key)
foursquare_results = fetch_foursquare_data(latitude, longitude, foursquare_api_key)

# Combine and filter results
combined_results = yelp_results + foursquare_results
rated_results = [place for place in combined_results if place.get('rating', 0) > 0]

# Sort by rating and select top 10
top_ten_places = sorted(rated_results, key=lambda x: x['rating'], reverse=True)[:10]

# Print the top 10 places
for index, place in enumerate(top_ten_places, 1):
    print(f"{index}. {place['name']} - Rating: {place['rating']}")



1. Flax & Kale - Rating: 5.0
2. Dalston Coffee - Rating: 5.0
3. Quixote - Rating: 5.0
4. Cal Robert - Rating: 5.0
5. Mastica - Rating: 5.0
6. La Catalista - Rating: 5.0
7. Tast-Ller [15 Loft] - Rating: 5.0
8. Gringa - Rating: 5.0
9. Ginette - Rating: 5.0
10. Espai Quera Llibres i platillos - Rating: 5.0
