# Statistics Modelling with Python, Project Two
### Part 2: Connecting to Foursquare and Yelp APIs

#### Import libraries

In [129]:
import requests
import json
import os
import foursquare
import pandas as pd
from tqdm import tqdm
import time
from time import sleep

# Foursquare
### Send a request to Foursquare with a small radius (250m) for all the bike stations in your city of choice.  
###### (I opted to lower this from 1000m to 250m as otherwise all of the CityBike staions would overlap)

#### Step 1: Foursquare API key set up for restaurants

In [155]:
# Setup Foursquare API credentials 
api_key = os.environ.get('FOURSQUARE_API_KEY')

# Define the endpoint URL for the Foursqare request
endpoint = "https://api.foursquare.com/v3/places/search"

# Define the query parameters to include only the latitude and longitude fields
def restaurant_foursquare_query(latitude, longitude):
    params = {
        'll': f'{latitude},{longitude}', 'radius':'250', 'query':'Restaurant', 'price': "1,2,3,4"
        }
    #Headers specific to FourSquare - used to specify wanted metadata - not typically found under all APIs. Found under https://location.foursquare.com/developer/reference/authentication
    headers = {"Accept": "application/json",
        "Authorization": api_key
        }

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content



#### Step 2: Define functions to retrieve FourSquare results for restaurants

In [153]:
#Retrieve the Foursquare query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = restaurant_foursquare_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Foursquare query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, location_name):
    json_content = restaurant_foursquare_query(latitude, longitude)
    poi_list = []
    for poi in json_content['results']:
        poi_dict = {}
        poi_dict['fsq_id'] = poi['fsq_id']
        poi_dict['location_name'] = location_name
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_dict['poi_type'] = [category['name'] for category in poi['categories']]
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on restaurants from Foursquare

In [1]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/data/CSV_files/London_CityBikes_Data.csv')

rand_df = bike_df.sample(n=250)

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
foursquare_restaurant_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    fsq_ids = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for fsq_id in fsq_ids:
        foursquare_parks_data_list.append({
            'fsq_id': fsq_id['fsq_id'], 
            'location_name': fsq_id['location_name'],
            'name': fsq_id['name'],
            'poi_type': fsq_id['poi_type'],                   
            'rating': fsq_id['rating'],
            'price': fsq_id['price']})
    time.sleep(1) # add 1 second delay between each iteration

#### Step 4:  Create a DataFrame with Foursquare restaurant results

In [134]:
foursquare_restaurants_df = pd.DataFrame(foursquare_restaurants_data_list)

#### Step 5: Export DataFrame to CSV

In [135]:
# export current yelp restaurants_df to Csv file to be able to review data further
foursquare_restaurants_df.to_csv('foursquare_restaurants_df.csv', index=False)

#### Step 1: Foursquare API key set up for Parks

In [136]:
# Setup Foursquare API credentials 
api_key = os.environ.get('FOURSQUARE_API_KEY')

# Define the endpoint URL for the Foursqare request
endpoint = "https://api.foursquare.com/v3/places/search"

# Define the query parameters to include only the latitude and longitude fields
def parks_foursquare_query(latitude, longitude):
    params = {
        'll': f'{latitude},{longitude}', 'radius':'250', 'query':'Parks', 'venuePhotos': '1', 'price': "1,2,3,4"
        }
    #Headers specific to FourSquare - used to specify wanted metadata - not typically found under all APIs. Found under https://location.foursquare.com/developer/reference/authentication
    headers = {"Accept": "application/json",
        "Authorization": api_key
        }

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content



#### Step 2: Define functions to retrieve FourSquare results for Parks

In [137]:
#Retrieve the Foursquare query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = parks_foursquare_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Foursquare query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, location_name):
    json_content = parks_foursquare_query(latitude, longitude)
    poi_list = []
    for poi in json_content['results']:
        poi_dict = {}
        poi_dict['fsq_id'] = poi['fsq_id']
        poi_dict['location_name'] = location_name
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_dict['poi_type'] = [category['name'] for category in poi['categories']]
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on parks from Foursquare

In [138]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/data/CSV_files/London_CityBikes_Data.csv')

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
foursquare_parks_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    fsq_ids = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for fsq_id in fsq_ids:
        foursquare_parks_data_list.append({
            'fsq_id': fsq_id['fsq_id'], 
            'location_name': fsq_id['location_name'],
            'name': fsq_id['name'],
            'poi_type': fsq_id['poi_type'],                   
            'rating': fsq_id['rating'],
            'price': fsq_id['price']})
    time.sleep(1) # add 1 second delay between each iteration

100%|██████████| 250/250 [05:03<00:00,  1.21s/it]


#### Step 4:  Create a DataFrame with Foursquare park results

In [139]:
foursquare_parks_df = pd.DataFrame(foursquare_parks_data_list)

#### Step 5: Export DataFrame to CSV

In [140]:
# export current yelp parks to Csv file to be able to review data further
foursquare_parks_df.to_csv('foursquare_parks_df.csv', index=False)

# Yelp

### Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

#### Step 1: Yelp API key set up for restaurants

In [141]:
# Setup Yelp API credentials 
api_key = os.environ.get('YELP_API_KEY')

# Define the endpoint URL for the Yelp request
endpoint = "https://api.yelp.com/v3/businesses/search"

# Define the query parameters to include only the latitude and longitude fields
def restaurant_yelp_query(latitude, longitude):
    params = {
        'latitude': latitude , 'longitude': longitude, 'radius': 250, 'term': 'restaurant'
        }
    #Headers specific to Yelp - used to specify wanted metadata - not typically found under all APIs.
    headers = {'Authorization': f'Bearer {api_key}'}

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content

#### Step 2: Define functions to retrieve Yelp results for restaurants

In [142]:
#Retrieve the Yelp query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = restaurant_yelp_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Yelp query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, name):
    json_content = restaurant_yelp_query(latitude, longitude)
    poi_list = []
    for poi in json_content['businesses']:
        poi_dict = {}
        poi_dict['id'] = poi['id']
        poi_dict['location'] = name
        poi_dict['categories'] = poi['categories']
        poi_dict['rating'] = poi['rating']
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on restaurants from Yelp

In [143]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/data/CSV_files/London_CityBikes_Data.csv')

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()

# create an empty list to store the dictionaries
yelp_restaurant_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    id = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for id in id:
        yelp_restaurant_data_list.append({'id': id['id'], 'name': id['name'], 'location': id['location'], 'categories': id['categories'], 'rating': id['rating'],'price': id['price']})
        

100%|██████████| 250/250 [02:01<00:00,  2.06it/s]


#### Step 4:  Create a DataFrame with Yelp restaurant results

In [144]:
yelp_restaurants_df = pd.DataFrame(yelp_restaurant_data_list)

#### Step 5: Export DataFrame to CSV

In [145]:
# export current yelp_restaurants_df to Csv file to be able to review data further
yelp_restaurants_df.to_csv('yelp_restaurants_df.csv', index=False)

## Yelp Parks

#### Step 1: Yelp API key set up for parks

In [146]:
# Setup Yelp API credentials 
api_key = os.environ.get('YELP_API_KEY')

# Define the endpoint URL for the Yelp request
endpoint = "https://api.yelp.com/v3/businesses/search"

# Define the query parameters to include only the latitude and longitude fields
def park_yelp_query(latitude, longitude):
    params = {
        'latitude': latitude , 'longitude': longitude, 'radius': 250, 'term': 'parks'
        }
    #Headers specific to Yelp - used to specify wanted metadata - not typically found under all APIs.
    headers = {'Authorization': f'Bearer {api_key}'}

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content

#### Step 2: Define functions to retrieve Yelp results for parks

In [147]:
#Retrieve the Yelp query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = park_yelp_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Yelp query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, name):
    json_content = park_yelp_query(latitude, longitude)
    poi_list = []
    for poi in json_content['businesses']:
        poi_dict = {}
        poi_dict['id'] = poi['id']
        poi_dict['location'] = name
        poi_dict['categories'] = poi['categories']
        poi_dict['rating'] = poi['rating']
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on parks from Yelp

In [148]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/data/CSV_files/London_CityBikes_Data.csv')

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
yelp_park_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    id = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for id in id:
        yelp_park_data_list.append({'id': id['id'], 'name': id['name'], 'location': id['location'], 'categories': id['categories'], 'rating': id['rating'],'price': id['price']})
        

100%|██████████| 250/250 [01:16<00:00,  3.26it/s]


#### Step 4:  Create a DataFrame with Yelp park results

In [149]:
yelp_parks_df = pd.DataFrame(yelp_park_data_list)

#### Step 5: Export DataFrame to CSV

In [150]:
# export current yelp_parks_df to Csv file to be able to review data further
yelp_parks_df.to_csv('yelp_parks_df.csv', index=False)

# Comparing Results

#### Which API provided you with more complete data? Provide an explanation. 

###### The Yelp API provided more comprehensive data than the FourSquare API. Although FourSquare offered options to retrieve additional information, such as pricing and ratings, much of this data was not available on the API network for London, England. This could be due to Yelp and FourSquare using different data sources to collect and aggregate their data. Additionally, Yelp may have access to more diverse and comprehensive sources, including user-generated reviews and ratings, while FourSquare may rely more on user check-ins and business listings.

###### The limitations of the FourSquare API, such as incomplete and unavailable data, affected my ability to draw conclusions about certain aspects of the restaurants and parks surrounding CityBike locations. This highlights the importance of considering the limitations of available data sources in any data analysis project, as without consistent data it is not possible to conduct a more in-depth analysis, provide additional insights, and uncover meaningful trends.

### Get the top 10 restaurants according to their rating

In [151]:
# Sort restaurants by rating in descending order
sorted_restaurants = yelp_restaurants_df.sort_values(by=['rating'], ascending=False)

# Select the top 10 restaurants
top_10_restaurants = sorted_restaurants.head(10)

# Print or display the top 10 restaurants
top_10_restaurants.head(10)


Unnamed: 0,id,name,location,categories,rating,price
1716,lKfvqN-13wuLWjpeN7mDAw,Carmel by the Green,"200131 - Clarkson Street, Bethnal Green","[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",5.0,
2087,Uw6EjFp_T_B9W54pUSROdQ,Nando's,"001036 - Crawford Street, Marylebone","[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",5.0,£
2031,afHfidMfqQKH0KtcayoYUw,Tops Pizza,"200190 - Queen's Circus, Battersea Park","[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",5.0,£
2040,iily7HL1Ab1zKmCs0bRj6A,Kim's,"200016 - Castlehaven Road, Camden Town","[{'alias': 'vietnamese', 'title': 'Vietnamese'}]",5.0,£
2045,--_1qlRyjSRY1poCHjaLlA,Italian Alley,"200016 - Castlehaven Road, Camden Town","[{'alias': 'italian', 'title': 'Italian'}, {'a...",5.0,
2050,IAORFVwv9BIG5X_OUjBVDg,Bian Dang,"200016 - Castlehaven Road, Camden Town","[{'alias': 'streetvendors', 'title': 'Street V...",5.0,
455,OxVF09YyKL2A_4EAoOCFyw,Scarlet Rosita Food,"002637 - Bermondsey Street, Bermondsey","[{'alias': 'foodstands', 'title': 'Food Stands'}]",5.0,
2052,WVVWW0kFsFpux_SpZLf6Lw,Belly N Bao,"200016 - Castlehaven Road, Camden Town","[{'alias': 'taiwanese', 'title': 'Taiwanese'},...",5.0,
2054,_TpRjqSg_MSm8Kh2-DcKng,Near & Far Camden,"200016 - Castlehaven Road, Camden Town","[{'alias': 'cocktailbars', 'title': 'Cocktail ...",5.0,
2063,IC1BAwfP-8Vt5SXujGFtug,Roxie Steak & Wine Cafe,"200217 - Disraeli Road, Putney","[{'alias': 'steak', 'title': 'Steakhouses'}]",5.0,££
