# Effictient Yelp API Calls (CORE)

**Marco Jimenez 6/3/2022**

In [69]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [70]:
# Loading API Credentials
with open('/Users/LP-Ca/.secret/yelp_api.json') as f:
    creds = json.load(f)
creds.keys()

dict_keys(['client id', 'API key'])

In [71]:
# Instantiating YelpAPI Variable
yelp_api = YelpAPI(creds['API key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1fffc261d08>

In [72]:
# Defining variables for search conditions
LOCATION = 'Westerly, RI,02804'
TERM = 'Pizza'

**Creating results-in-progress JSON file**

In [73]:
JSON_FILE = f"Data/results_in_progress_Pizza.json"
JSON_FILE

'Data/results_in_progress_Pizza.json'

In [74]:
# Checking if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
# For loop in case it does not exist
if file_exists == False:
    # Getting folder name
    folder = os.path.dirname(JSON_FILE)
    # If JSON_FILE is included in folder:
    if len(folder)>0:
        # Create folder
        os.makedirs(folder, exist_ok=True)
    print(f"[i] {JSON_FILE} not found. Saving empty list to file")       
        
    # Saving first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_Pizza.json already exists.


In [75]:
# Loading previous results and using len of results for offset
with open(JSON_FILE,'r') as f:
    prev_results = json.load(f)
    
## set offset based on previous results
n_results = len(prev_results)
print(f'- {n_results} previous results found.')

- 1000 previous results found.


**Figuring out required number of pages**

In [76]:
# Performing API call and displayin dict keys
results = yelp_api.search_query(location=LOCATION,
                               term=TERM,
                               offset=n_results)
results.keys()

YelpAPIError: VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.

In [None]:
# Getting total number of results
total_results = results['total']
total_results

In [None]:
# Getting number of results per page
results_per_page = len(results['businesses'])
results_per_page

In [None]:
# Getting the number of pages by subtracting n_offset from the total and dividing the remainder by the number of results per page 
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

**Adding results to .json file**

In [None]:
prev_results.extend(results['businesses'])
with open(JSON_FILE, 'w') as f:
     json.dump(prev_results,f)

**For loop to call each page**

In [None]:
pip install tqdm

In [None]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    # Reading in results in progress file and checking the length
    with open(JSON_FILE, 'r') as f:
        prev_results = json.load(f)
    # Saving number of results to use as offset
    n_results = len(prev_results)
    # Using n_results as the offset value 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    # Appending new results and saving to file
    prev_results.extend(results['businesses'])
    
# Displaying previous_results
    with open(JSON_FILE,'w') as f:
        json.dump(prev_results,f)

In [None]:
# Loading 'results in progress' json file into a dataframe
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

In [None]:
# Saving final dataframe to a .csv