# Statistics Modelling with Python, Project Two
### Part 2: Connecting to Foursquare and Yelp APIs

#### Import libraries

In [179]:
import requests
import json
import os
import foursquare
import pandas as pd
from tqdm import tqdm
import time
from time import sleep

# Foursquare

### Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

##### Step 1: Foursquare API key set up for restaurants

In [180]:
# Setup Foursquare API credentials 
api_key = os.environ.get('FOURSQUARE_API_KEY')

# Define the endpoint URL for the Foursqare request
endpoint = "https://api.foursquare.com/v3/places/search"

# Define the query parameters to include only the latitude and longitude fields
def restaurant_foursquare_query(latitude, longitude):
    params = {
        'll': f'{latitude},{longitude}', 'radius':'1000', 'query':'Restaurant'
        }
    #Headers specific to FourSquare - used to specify wanted metadata - not typically found under all APIs. Found under https://location.foursquare.com/developer/reference/authentication
    headers = {"Accept": "application/json",
        "Authorization": api_key
        }

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content



#### Step 2: Define functions to retrieve FourSquare results for restaurants

In [181]:
#Retrieve the Foursquare query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = restaurant_foursquare_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Foursquare query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, location_name):
    json_content = restaurant_foursquare_query(latitude, longitude)
    poi_list = []
    for poi in json_content['results']:
        poi_dict = {}
        poi_dict['fsq_id'] = poi['fsq_id']
        poi_dict['location_name'] = location_name
        poi_dict['poi_type'] = [category['name'] for category in poi['categories']]
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on restaurants from Foursquare

In [182]:
# Load CityBike DataFrame
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/notebooks/London_CityBikes_Data.csv')

rand_df = bike_df.sample(n=250)

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
foursquare_restaurant_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    fsq_ids = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for fsq_id in fsq_ids:
        foursquare_restaurant_data_list.append({'fsq_id': fsq_id['fsq_id'], 'location_name': fsq_id['location_name'], 'poi_type': fsq_id['poi_type']})
    time.sleep(1) # add 1 second delay between each iteration

100%|██████████| 250/250 [07:25<00:00,  1.78s/it]


#### Step 4:  Create a DataFrame with Foursquare restaurant results

In [185]:
foursquare_restaurants_df = pd.DataFrame(foursquare_restaurant_data_list)

#### Step 5: Export DataFrame to CSV

In [186]:
# export current yelp_restaurants_df to Csv file to be able to review data further
foursquare_restaurants_df.to_csv('foursquare_restaurants_df.csv', index=False)

##### Step 1: Foursquare API key set up for Parks

In [190]:
# Setup Foursquare API credentials 
api_key = os.environ.get('FOURSQUARE_API_KEY')

# Define the endpoint URL for the Foursqare request
endpoint = "https://api.foursquare.com/v3/places/search"

# Define the query parameters to include only the latitude and longitude fields
def parks_foursquare_query(latitude, longitude):
    params = {
        'll': f'{latitude},{longitude}', 'radius':'1000', 'query':'Landmarks and Outdoors > Park'
        }
    #Headers specific to FourSquare - used to specify wanted metadata - not typically found under all APIs. Found under https://location.foursquare.com/developer/reference/authentication
    headers = {"Accept": "application/json",
        "Authorization": api_key
        }

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content



#### Step 2: Define functions to retrieve FourSquare results for Parks

In [191]:
#Retrieve the Foursquare query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = parks_foursquare_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Foursquare query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, location_name):
    json_content = parks_foursquare_query(latitude, longitude)
    poi_list = []
    for poi in json_content['results']:
        poi_dict = {}
        poi_dict['fsq_id'] = poi['fsq_id']
        poi_dict['location_name'] = location_name
        poi_dict['poi_type'] = [category['name'] for category in poi['categories']]
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on parks from Foursquare

In [192]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/notebooks/London_CityBikes_Data.csv')

rand_df = bike_df.sample(n=250)

# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
foursquare_parks_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    fsq_ids = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for fsq_id in fsq_ids:
        foursquare_parks_data_list.append({'fsq_id': fsq_id['fsq_id'], 'location_name': fsq_id['location_name'], 'poi_type': fsq_id['poi_type']})
    time.sleep(1) # add 1 second delay between each iteration

 62%|██████▏   | 155/250 [03:29<02:08,  1.35s/it]


KeyboardInterrupt: 

#### Step 4:  Create a DataFrame with Foursquare park results

In [None]:
foursquare_parks_df = pd.DataFrame(foursquare_parks_data_list)

#### Step 5: Export DataFrame to CSV

In [None]:
# export current yelp_restaurants_df to Csv file to be able to review data further
foursquare_parks_df.to_csv('foursquare_parks_df.csv', index=False)

# Yelp

### Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

##### Step 1: Yelp API key set up for restaurants

In [143]:
# Setup Yelp API credentials 
api_key = os.environ.get('YELP_API_KEY')

# Define the endpoint URL for the Yelp request
endpoint = "https://api.yelp.com/v3/businesses/search"

# Define the query parameters to include only the latitude and longitude fields
def restaurant_yelp_query(latitude, longitude):
    params = {
        'latitude': latitude , 'longitude': longitude, 'radius': 1000, 'term': 'restaurant'
        }
    #Headers specific to Yelp - used to specify wanted metadata - not typically found under all APIs.
    headers = {'Authorization': f'Bearer {api_key}'}

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content

#### Step 2: Define functions to retrieve Yelp results for restaurants

In [144]:
#Retrieve the Yelp query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = restaurant_yelp_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Yelp query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, name):
    json_content = restaurant_yelp_query(latitude, longitude)
    poi_list = []
    for poi in json_content['businesses']:
        poi_dict = {}
        poi_dict['id'] = poi['id']
        poi_dict['location'] = name
        poi_dict['categories'] = poi['categories']
        poi_dict['rating'] = poi['rating']
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on restaurants from Yelp

In [145]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/notebooks/London_CityBikes_Data.csv')


# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
yelp_restaurant_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    id = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for id in id:
        yelp_restaurant_data_list.append({'id': id['id'], 'name': id['name'], 'location': id['location'], 'categories': id['categories'], 'rating': id['rating'],'price': id['price']})
        

100%|██████████| 250/250 [02:51<00:00,  1.45it/s]


#### Step 4:  Create a DataFrame with Yelp restaurant results

In [146]:
yelp_restaurants_df = pd.DataFrame(yelp_restaurant_data_list)

#### Step 5: Export DataFrame to CSV

In [175]:
# export current yelp_restaurants_df to Csv file to be able to review data further
yelp_restaurants_df.to_csv('yelp_restaurants_df.csv', index=False)

## Yelp Parks

##### Step 1: Yelp API key set up for parks

In [187]:
# Setup Yelp API credentials 
api_key = os.environ.get('YELP_API_KEY')

# Define the endpoint URL for the Yelp request
endpoint = "https://api.yelp.com/v3/businesses/search"

# Define the query parameters to include only the latitude and longitude fields
def park_yelp_query(latitude, longitude):
    params = {
        'latitude': latitude , 'longitude': longitude, 'radius': 1000, 'term': 'parks'
        }
    #Headers specific to Yelp - used to specify wanted metadata - not typically found under all APIs.
    headers = {'Authorization': f'Bearer {api_key}'}

    # Send an HTTP GET request to the endpoint URL with the query parameters
    response = requests.get(endpoint, params=params, headers=headers)

    # Get the JSON content from the response
    json_content = response.json()

    # Print out the entire JSON response to see catergory names
    return json_content

#### Step 2: Define functions to retrieve Yelp results for parks

In [188]:
#Retrieve the Yelp query results for a given latitude and longitude
def bike_radius(latitude, longitude):
    json_content = park_yelp_query(latitude, longitude)
    poi = json_content['results']
    return json_content

#extract the relevant information from the Yelp query result and store it in a dictionary that is returned as a list.
def bike_radius_info(latitude, longitude, name):
    json_content = park_yelp_query(latitude, longitude)
    poi_list = []
    for poi in json_content['businesses']:
        poi_dict = {}
        poi_dict['id'] = poi['id']
        poi_dict['location'] = name
        poi_dict['categories'] = poi['categories']
        poi_dict['rating'] = poi['rating']
        poi_dict['name'] = poi['name']
        try:
            poi_dict['rating'] = poi['rating']
        except:
            poi_dict['rating'] = ''
        try:
            poi_dict['price'] = poi['price']
        except:
            poi_dict['price'] = ''
        poi_list.append(poi_dict)
    return poi_list

#### Step 3: Create a loop to parse through the CityBikes data and retrieve point of interest (POI) details on parks from Yelp

In [189]:
# read the file containing the locations
bike_df = pd.read_csv('/Users/brittanyharding/LHL-Projects/Statistical-Modelling-with-Python/notebooks/London_CityBikes_Data.csv')


# extract the name, latitude, and longitude columns from the City Bike Data
names = rand_df['name'].unique()
latitudes = rand_df['latitude'].unique()
longitudes = rand_df['longitude'].unique()


# create an empty list to store the dictionaries
yelp_park_data_list = []

# loop through each location and call the bike_radius_info function
for i in tqdm(range(len(latitudes))):
    id = bike_radius_info(latitudes[i], longitudes[i], names[i])
    for id in id:
        yelp_park_data_list.append({'id': id['id'], 'name': id['name'], 'location': id['location'], 'categories': id['categories'], 'rating': id['rating'],'price': id['price']})
        

100%|██████████| 250/250 [01:23<00:00,  2.99it/s]


#### Step 4:  Create a DataFrame with Yelp park results

In [150]:
yelp_parks_df = pd.DataFrame(yelp_park_data_list)

#### Step 5: Export DataFrame to CSV

In [173]:
# export current yelp_parks_df to Csv file to be able to review data further
yelp_parks_df.to_csv('yelp_parks_df.csv', index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating