In [50]:
# Imports
import pandas as pd
import numpy as np
import requests
import os
import json

def api_request(url,headers=None,params=None):
    """
    Send an HTTP GET request to the specified URL with optional headers and parameters.

    Args:
        url (str): The URL to send the GET request to.
        headers (dict, optional): A dictionary of HTTP headers to include in the request.
            Default is None.
        params (dict, optional): A dictionary of query parameters to include in the request.
            Default is None.
    """
    
    request = requests.get(url, headers=headers)
    return request.json()

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [71]:
# Load bike data 
bikes_df = pd.read_csv('bikes_df.csv')

# Get the Foursquare API key from environment variables
FOUR_SQUARE_KEY = os.environ['FOUR_SQUARE_KEY']

# Define headers for Foursquare API requests
headers = {
    "accept": "application/json",
    "Authorization": FOUR_SQUARE_KEY
}

four_square_ids = []
# API call to get the list of foursquare ids
for num in range(len(bikes_df)):
    url = f"https://api.foursquare.com/v3/places/search?query=restaurant%2Cbar%2Cmuseum&ll={bikes_df['bike_station_lat'][num]}%2C{bikes_df['bike_station_long'][num]}&radius=1000&fields=fsq_id%2Cdistance"
    four_square_ids.append(api_request(url, headers=headers))

# Normalize and extract relevant data from the Foursquare API response
fsq_ids = pd.json_normalize(four_square_ids, record_path='results', meta=[['context', 'geo_bounds', 'circle', 'center', 'latitude'],
                                                                          ['context', 'geo_bounds', 'circle', 'center', 'longitude']])

fsq_ids_list = fsq_ids.values.tolist()


foursquare_locales = []

# Define a function to make Foursquare API requests for a range of items in fsq_ids_list
def api_call_range(start_end):
    """
    Make a series of Foursquare API requests for a range of items in the fsq_ids_list.

    Args:
        start_end (tuple): A tuple specifying the start and end indices of the range to process.
            The range is inclusive of the start index and exclusive of the end index.

    Returns:
        None

    This function iterates through a specified range of items in the fsq_ids_list, constructs
    Foursquare API URLs for each item, makes API requests, and appends the results to the
    foursquare_locales list.

    """
    start = start_end[0]
    end = start_end[1]
    for num in range(start, end):
        url = f"https://api.foursquare.com/v3/places/{fsq_ids_list[num][0]}?fields=rating%2Cname%2Clocation%2Cfsq_id%2Cpopularity%2Chours%2Cprice%2Cmenu%2Ctastes%2Cdistance%2Ccategories"
        foursquare_locales.append(api_request(url, headers=headers))
    

# Define a list of ranges for making API requests in chunks
range_list = [(0, 1001), (1001, 2001), (2001, len(fsq_ids_list))]

# Loop through the range list and make API requests for each range
for nums in range_list:
    api_call_range(nums)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [389]:
# Normalize the data obtained from Foursquare API responses and create a DataFrame
fsq_df = pd.json_normalize(foursquare_locales)

# Choose relevant columns from the DataFrame
fsq_df = fsq_df[['fsq_id', 'name', 'popularity', 'price', 'rating', 'hours.display', 'location.address']]
# Changing column names for easier reference
fsq_df.rename(columns={'hours.display':'hours',
                       'location.address':'address'}, inplace=True)
# Check for duplicate 'fsq_id' values in the DataFrame
fsq_df['fsq_id'].is_unique  
fsq_df['fsq_id'].value_counts() 

# Drop duplicate rows based on the 'fsq_id' column
fsq_df.drop_duplicates(subset=['fsq_id'], inplace=True)

# Verify that duplicate 'fsq_id' values have been removed
fsq_df['fsq_id'].value_counts()  

# Check the data types of the columns in the DataFrame
fsq_df.dtypes

# Replacing null values
# Replacing null values with the mean total of the column
fsq_df.loc[fsq_df['rating'].isnull(), 'rating'] = fsq_df['rating'].mean(axis=0)
fsq_df.loc[fsq_df['popularity'].isnull(), 'popularity'] = fsq_df['popularity'].mean(axis=0)

# Replacing null values with the min value of the column
fsq_df.loc[fsq_df['price'].isnull(), 'price'] = fsq_df['price'].min()

# Replacing null values with set value
fsq_df.loc[fsq_df['hours'].isnull(), 'hours'] = 'not available'

# checking again for any nulls
fsq_df.isnull().value_counts()
fsq_df[fsq_df['address'].isnull()]

# Replacing address column nulls with fixed value
fsq_df.loc[fsq_df['address'].isnull(), 'address'] = 'not available'
fsq_df.isnull().value_counts()

fsq_id  name   popularity  price  rating  hours  address
False   False  False       False  False   False  False      928
Name: count, dtype: int64

Put your parsed results into a DataFrame

In [391]:
# Merge the 'fsq_df' DataFrame with the 'fsq_ids' DataFrame based on the 'fsq_id' column
final_fsq_df = pd.merge(fsq_df, fsq_ids, on='fsq_id')

# Rename specific columns to clarify their meaning
final_fsq_df = final_fsq_df.rename(columns={'context.geo_bounds.circle.center.latitude': 'bike_station_lat',
                                             'context.geo_bounds.circle.center.longitude': 'bike_station_long'})

# Drop any duplicate rows to ensure data integrity
final_fsq_df = final_fsq_df.drop_duplicates()

# Save the final merged DataFrame to a CSV file for future use
final_fsq_df.to_csv('final_fsq_df.csv', index=False)

Unnamed: 0,fsq_id,name,popularity,price,rating,hours,address,distance,bike_station_lat,bike_station_long


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [239]:
# Perform Yelp API calls to retrieve information about locales near bike stations
YELP_KEY = os.environ['YELP_KEY']  
headers = {
        "accept": "application/json",
        "authorization": f'Bearer {YELP_KEY}'
yelp_locales = [] 

for num in range(len(bikes_df)):
    yelp_url = f"https://api.yelp.com/v3/businesses/search?latitude={bikes_df['bike_station_lat'][num]}&longitude={bikes_df['bike_station_long'][num]}&radius=1000&categories=bars%2Cbeaches&sort_by=best_match&limit=5"
    yelp_locales.append(api_request(yelp_url, headers=headers))


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [292]:
# Normalize Yelp locale data to extract relevant information and create DataFrames
yelp_df1 = pd.json_normalize(yelp_locales, record_path='businesses', meta=[['region','center','latitude'], ['region','center','longitude']])

# Select specific columns from the normalized DataFrame
yelp_df1 = yelp_df1[['id', 'region.center.latitude', 'region.center.longitude']]

# Normalize Yelp locale data again to extract additional information
yelp_df2 = pd.json_normalize(yelp_locales, record_path=['businesses', 'categories'], meta=[['results','id'],
                                                                                           ['results','name'],
                                                                                           ['results','review_count'],
                                                                                           ['results','location','address1'],
                                                                                           ['results','rating']])


In [293]:
# Rename columns
yelp_df2 = yelp_df2.rename(columns={'results.id':'id'})
#check for duplicates
yelp_df1.duplicated()
yelp_df1.value_counts()
#drop unneccessary columns
yelp_df2.drop(columns=['alias', 'title'], inplace=True)
#check for duplicates
yelp_df2.value_counts()
yelp_df2.drop_duplicates(inplace=True)

Put your parsed results into a DataFrame

In [373]:
# Merge the two Yelp DataFrames 'yelp_df1' and 'yelp_df2' based on the 'id' column
final_yelp_df = pd.merge(yelp_df1, yelp_df2, on='id')

# Select specific columns of interest from the merged DataFrame
final_yelp_df = final_yelp_df[['results.name', 
                               'results.location.address1',  
                               'results.rating', 
                               'results.review_count',
                               'region.center.latitude', 
                               'region.center.longitude']]

# Rename columns for clarity and consistency
final_yelp_df = final_yelp_df.rename(columns={'results.name': 'name', 
                                              'results.location.address1': 'address',
                                              'results.rating': 'rating',
                                              'results.review_count': 'review_count',
                                              'region.center.latitude': 'bike_station_lat',
                                              'region.center.longitude': 'bike_station_long'})

# Drop duplicate rows to ensure data integrity
final_yelp_df.value_counts()
final_yelp_df.drop_duplicates(inplace=True)

# Checking for null values
final_yelp_df.isnull().value_counts()
final_yelp_df[final_yelp_df['address'].isnull()]

# Replacing nulls in address column with fixed value
final_yelp_df.loc[final_yelp_df['address'].isnull(), 'address'] = 'not available'

# Confirming null values cleaned
final_yelp_df.isnull().value_counts()
final_yelp_df.to_csv('final_yelp_df.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Foursquare provided data for 892 locations, while Yelp returned information for only 369 locations. Foursquare employs a more precise 10-point rating system, whereas Yelp uses a 5-point system. Additionally, Yelp includes a count of the number of reviews for each location. In my opinion, Foursquare offered a more comprehensive view of both the quantity and quality of locales within the vicinity of each bike station. However, it's worth noting that both platforms offer additional metrics that I chose not to utilize for this specific project, as I deemed them unnecessary.

Get the top 10 restaurants according to their rating

In [374]:
def top_10_by_rating(dataframe, business_type):
    """
    Get the top 10 businesses of a specified type by their ratings from a DataFrame.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing business data.
        business_type (str): The type of business to filter by (e.g., 'restaurant', 'bar').

    Returns:
        pd.DataFrame: A DataFrame containing the top 10 businesses of the specified type,
        sorted by rating in descending order.

    This function filters the given DataFrame to include only businesses that match the specified
    'business_type', groups them by 'name' and 'rating', counts the occurrences, and then sorts
    the businesses by 'rating' in descending order to return the top 10 rated businesses.

    Example:
        top_10_restaurants = top_10_by_rating(dataframe, 'restaurant')
        top_10_bars = top_10_by_rating(dataframe, 'bar')
    """
    return dataframe[dataframe['name'].str.contains(business_type)].groupby(['name', 'rating'])\
           .count().sort_values(by='rating', ascending=False).head(10)

In [375]:
top_10_by_rating(final_fsq_df, 'Restaurant')

Unnamed: 0_level_0,Unnamed: 1_level_0,fsq_id,popularity,price,hours.display,location.address,distance,bike_station_lat,bike_station_long
name,rating,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
The Misfit Bar & Restaurant,8.9,1,1,1,1,1,1,1,1
Jitlada Thai Restaurant,8.9,3,3,3,3,3,3,3,3
The Dresden Restaurant,8.7,9,9,9,9,9,9,9,9
Bianca Restaurant,8.7,2,2,2,2,2,2,2,2
Casablanca Restaurant,8.7,4,4,4,4,4,4,4,4
Lares Restaurant,8.6,1,1,1,1,1,1,1,1
Thunderbolt Restaurant,8.6,4,4,4,4,4,4,4,4
Johnnie's Pastrami Restaurant,8.6,2,2,2,2,2,2,2,2
Versailles Cuban Restaurant,8.6,6,6,6,6,6,6,6,6
Irori Japanese Restaurant,8.5,3,3,3,3,3,3,3,3


In [376]:
top_10_by_rating(final_yelp_df, 'Restaurant')

Unnamed: 0_level_0,Unnamed: 1_level_0,address,review_count,bike_station_lat,bike_station_long
name,rating,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rustic Kitchen Restaurant & Wine Bar,4.5,2,2,2,2
Seoul Salon Restaurant & Bar,4.5,2,2,2,2
Bamboo Restaurant,4.0,6,6,6,6
Barbrix Wine Shop & Restaurant,4.0,1,1,1,1
Blair's Restaurant,4.0,2,2,2,2
Il Moro Restaurant,4.0,1,1,1,1
Ivanhoe Restaurant & Bar,4.0,1,1,1,1
Joliet Restaurant and Bar,4.0,5,5,5,5
Noe Restaurant and Bar,4.0,4,4,4,4
The Exchange Restaurant,4.0,5,5,5,5


In [392]:
final_yelp_df[final_yelp_df['name'].str.contains('Kaos')]

Unnamed: 0,name,address,rating,review_count,bike_station_lat,bike_station_long
970,Kaos Bartending,,5.0,8,34.17765,-118.38231
971,Kaos Bartending,,5.0,8,34.16835,-118.3875
