In [1]:
import pandas as pd
import requests

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [10]:
API_KEY = 'hidden'

def fetch_bike_rentals_from_foursquare(lat, lon, radius=1000, limit=50):
    """
    """
    url = "https://api.foursquare.com/v3/places/search"
    headers = {
        "Accept": "application/json",
        "Authorization": API_KEY
    }
    params = {
        'll': f"{lat},{lon}",
        'radius': radius,
        'limit': limit,
        'categories': '19002'  # Foursquare category ID for bike rentals
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        rentals = response.json()['results']
        return [{
            'rental_name': rental['name'],
            'rental_lat': rental['geocodes']['main']['latitude'],
            'rental_lon': rental['geocodes']['main']['longitude'],
            'rental_address': rental.get('location', {}).get('address', 'No Address Provided')
        } for rental in rentals]
    else:
        print(f"Failed to fetch data: {response.status_code} - {response.reason}")
        return []

# Single latitude and longitude in Barcelona for 1000m radius search
latitude = 41.40902
longitude = 2.195415

# Fetch nearby bike rentals from Foursquare
bike_rentals = fetch_bike_rentals_from_foursquare(latitude, longitude)

# Convert the collected data into a DataFrame
rentals_df = pd.DataFrame(bike_rentals)
rentals_df['station'] = 'C/ BILBAO, 174'

# Save the data to a CSV file
csv_file_path = "/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/foursquare_bike_rentals.csv"
rentals_df.to_csv(csv_file_path, index=False)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [3]:
API_KEY = 'hidden'
stations_df = pd.DataFrame({
    'station_id': [1, 2, 3],
    'latitude': [41.387015, 41.391947, 41.396992],
    'longitude': [2.170047, 2.164485, 2.165105]
})


In [4]:

def fetch_and_parse_pois(lat, lon, radius=1000, limit=50):
    """
    Fetches POIs from Foursquare around a given latitude and longitude.
    Parses the response to extract names, locations, and ratings of venues.
    """
    url = "https://api.foursquare.com/v3/places/search"
    headers = {"Authorization": API_KEY}
    params = {'ll': f"{lat},{lon}", 'radius': radius, 'limit': limit, 'sort': 'RATING'}
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        venues = response.json()['results']
        parsed_venues = []
        for venue in venues:
            venue_details = {
                'name': venue['name'],
                'latitude': venue['geocodes']['main']['latitude'],
                'longitude': venue['geocodes']['main']['longitude'],
                'rating': venue.get('rating', 'No rating')  # Use 'No rating' if rating is not available
            }
            parsed_venues.append(venue_details)
        return parsed_venues
    else:
        print(f"Failed to fetch POIs: {response.status_code}")
        return []


In [5]:

# Fetch and parse POIs for a sample station
example_pois = fetch_and_parse_pois(stations_df.iloc[0]['latitude'], stations_df.iloc[0]['longitude'])

# Notes:
# - Parses each POI to extract and return a simplified dictionary of details.
# - Handles cases where a POI does not have a rating.
# - Checks the status code of the response to handle failed requests.

Put your parsed results into a DataFrame

In [6]:
all_pois = []

for _, row in stations_df.iterrows():
    pois = fetch_and_parse_pois(row['latitude'], row['longitude'])
    all_pois.extend(pois)  # Combine POIs from all stations

pois_df = pd.DataFrame(all_pois)

# Notes:
# - Iterates through each bike station to fetch nearby POIs.
# - Combines all fetched POIs into a single list of dictionaries.
# - Converts this list into a DataFrame for easy handling and analysis.

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in Barcelona

In [7]:
#fetch Yelp data of bike stations s with ratings and save to CSV
def fetch_and_save_yelp_data():
    # API Key and header setup
    api_key = "hidden"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "accept": "application/json"
    }

    # URL with pre-defined query parameters
    url = "https://api.yelp.com/v3/businesses/search?location=Barcelona&latitude=41.40902&longitude=2.195415&radius=1000&categories=bikerentals&sort_by=rating&limit=50"

    # Send request
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])

        # Creating DataFrame from bike rental data
        df = pd.DataFrame([{
            'Name': business['name'],
            'Rating': business.get('rating'),
            'Location': business['location']['address1'],
            'Latitude': business['coordinates']['latitude'],
            'Longitude': business['coordinates']['longitude']
        } for business in businesses])
        
        # File path
        file_path = "/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/yelp_bike_data.csv"
        
        # Save to CSV 
        df.to_csv(file_path, index=False)
        print(f"Data successfully saved to {file_path}")
    else:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")

# Call the function to fetch data and save it
fetch_and_save_yelp_data()

Data successfully saved to /Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/yelp_bike_data.csv


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [8]:
#fetch Yelp data of resturants and bars with ratings and save to CSV
def fetch_and_save_yelp_data():
    # API Key and header setup
    api_key = "hidden"
    headers = {
        "Authorization": f"Bearer {api_key}",
        "accept": "application/json"
    }

    # URL with pre-defined query parameters
    url = "https://api.yelp.com/v3/businesses/search?location=Barcelona&latitude=41.40902&longitude=2.195415&categories=restaurants,bars&sort_by=rating&limit=50"

    # Send request
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        businesses = data.get('businesses', [])

             # Creating DataFrame from business data
        df = pd.DataFrame([{
            'Name': business['name'],
            'Rating': business.get('rating'),
            'Location': business['location']['address1'],
            'Latitude': business['coordinates']['latitude'],
            'Longitude': business['coordinates']['longitude']
        } for business in businesses])
        
        # File path
        file_path = "/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/yelp_business_data.csv"
        
        # Save to CSV 
        df.to_csv(file_path, index=False)
        print(f"Data successfully saved to {file_path}")
    else:
        print(f"Failed to fetch data: {response.status_code} - {response.text}")

# Call the function to fetch data and save it
fetch_and_save_yelp_data()



Data successfully saved to /Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/yelp_business_data.csv


Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Foursquare: Provided a more extensive list of bike rental locations, making it useful for understanding geographic distribution and accessibility.

Yelp: Offered fewer locations but included detailed user ratings and reviews, which are valuable for assessing customer satisfaction and service quality.

Conclusion: Foursquare excels in quantity, making it ideal for broad analyses, while Yelp provides richer detail that aids in evaluating the quality of services.

Get the top 10 restaurants according to their rating

In [9]:
# Load Yelp business data
yelp_path = '/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/yelp_business_data.csv'
yelp_data = pd.read_csv(yelp_path)
top10_yelp = yelp_data.nlargest(10, 'Rating')
print("Top 10 Yelp Restaurants:")
print(top10_yelp[['Name', 'Rating', 'Location']])  # Show relevant columns

# Load Foursquare restaurant data
foursquare_path = '/Users/jorgen/Documents/LHL/project/Statistical-Modeling-with-Python/data/restaurants_df.csv'
foursquare_data = pd.read_csv(foursquare_path)
top10_foursquare = foursquare_data.nlargest(10, 'Rating')
print("\nTop 10 Foursquare Restaurants:")
print(top10_foursquare[['Name', 'Rating', 'Location']])  # Show relevant columns

#Note: Fetching top 10 resturans for yelp and Foursquare independently so I can check for any potentiel issues before moving out to Joining data in part 3

Top 10 Yelp Restaurants:
               Name  Rating                        Location
0           Ginette     5.0       Carrer del Rec Comtal, 12
1        Pasticelía     5.0            Carrer de París, 165
2     Lilith & Sons     5.0      Carrer d'En Fontrodona, 23
3      Imperfecto36     5.0          Carrer de la Mercè, 36
4           Itacate     5.0    Carrer de Comte Borrell, 107
5        Stop & Mos     5.0  Carrer de los Castillejos, 158
6            Somnia     5.0            Calle Montserrat, 13
7        Obsessions     5.0    Carrer Nou de la Rambla, 149
8  El Bar del Basko     5.0           Carrer de Guitert, 19
9      La Catalista     5.0         Carrer dels Carders, 11

Top 10 Foursquare Restaurants:
                  Name  Rating  \
25        La Catalista     5.0   
13                TAPS     4.9   
27           Cremat 11     4.9   
31           Le Romane     4.9   
36   Sucursal Aceitera     4.9   
6   Churrería Laietana     4.8   
7            Micu Maku     4.8   
10        B