# Effictient Yelp API Calls (CORE)

**Marco Jimenez 6/3/2022**

In [77]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [78]:
# Loading API Credentials
with open('/Users/LP-Ca/.secret/yelp_api.json') as f:
    creds = json.load(f)
creds.keys()

dict_keys(['client id', 'API key'])

In [79]:
# Instantiating YelpAPI Variable
yelp_api = YelpAPI(creds['API key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1fffc1365c8>

In [80]:
# Defining variables for search conditions
LOCATION = 'Westerly, RI,02804'
TERM = 'Pizza'

**Creating results-in-progress JSON file**

In [81]:
JSON_FILE = f"Data/results_in_progress_Pizza.json"
JSON_FILE

'Data/results_in_progress_Pizza.json'

In [82]:
# Checking if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
# For loop in case it does not exist
if file_exists == False:
    # Getting folder name
    folder = os.path.dirname(JSON_FILE)
    # If JSON_FILE is included in folder:
    if len(folder)>0:
        # Create folder
        os.makedirs(folder, exist_ok=True)
    print(f"[i] {JSON_FILE} not found. Saving empty list to file")       
        
    # Saving first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_Pizza.json not found. Saving empty list to file


In [83]:
# Loading previous results and using len of results for offset
with open(JSON_FILE,'r') as f:
    prev_results = json.load(f)
    
## set offset based on previous results
n_results = len(prev_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


**Figuring out required number of pages**

In [84]:
# Performing API call and displayin dict keys
results = yelp_api.search_query(location=LOCATION,
                               term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [85]:
# Getting total number of results
total_results = results['total']
total_results

118

In [86]:
# Getting number of results per page
results_per_page = len(results['businesses'])
results_per_page

20

In [87]:
# Getting the number of pages by subtracting n_offset from the total and dividing the remainder by the number of results per page 
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

6

**Adding results to .json file**

In [88]:
prev_results.extend(results['businesses'])
with open(JSON_FILE, 'w') as f:
     json.dump(prev_results,f)

**For loop to call each page**

In [89]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [90]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    # Reading in results in progress file and checking the length
    with open(JSON_FILE, 'r') as f:
        prev_results = json.load(f)
    # Saving number of results to use as offset
    n_results = len(prev_results)
    # Using n_results as the offset value 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    # Appending new results and saving to file
    prev_results.extend(results['businesses'])
    
# Displaying previous_results
    with open(JSON_FILE,'w') as f:
        json.dump(prev_results,f)

  0%|          | 0/6 [00:00<?, ?it/s]

In [91]:
# Loading 'results in progress' json file into a dataframe
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,7oq4EY0WEO840xfLS6c9iQ,main-street-pizza-ashaway-2,Main Street Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/Y7v9sw...,False,https://www.yelp.com/biz/main-street-pizza-ash...,29,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.5,"{'latitude': 41.427066, 'longitude': -71.784767}",[],$,"{'address1': '229 Main', 'address2': None, 'ad...",14013776860,(401) 377-6860,1784.570861
1,VagHPaOSlmrUCowH7lzWmw,vittorias-ny-pizza-westerly,Vittoria's NY Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/y0mGE2...,False,https://www.yelp.com/biz/vittorias-ny-pizza-we...,67,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 41.351879, 'longitude': -71.7709944}",[delivery],$$,"{'address1': '224 Post Rd', 'address2': 'Ste 3...",14013221901,(401) 322-1901,8460.528206
2,EDSQSFjQrrD56led7AvTpQ,mr-pizza-westerly,Mr. Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/_-FNJL...,False,https://www.yelp.com/biz/mr-pizza-westerly?adj...,18,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 41.36617, 'longitude': -71.830403...",[delivery],,"{'address1': '49 Beach St', 'address2': None, ...",14015967698,(401) 596-7698,8839.788579
3,Zyfo5HuK1i1YZCv3U-GvwQ,the-pizza-lady-pawcatuck,The Pizza Lady,https://s3-media1.fl.yelpcdn.com/bphoto/16ZFRx...,False,https://www.yelp.com/biz/the-pizza-lady-pawcat...,37,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 41.379569, 'longitude': -71.836204}",[delivery],$,"{'address1': '50 Liberty St', 'address2': None...",18605991113,(860) 599-1113,8098.993882
4,zgNte7hf2UZsXpTBIoJkJQ,casa-della-luce-westerly,Casa Della Luce,https://s3-media1.fl.yelpcdn.com/bphoto/oAIn8p...,False,https://www.yelp.com/biz/casa-della-luce-weste...,143,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.0,"{'latitude': 41.360386, 'longitude': -71.811803}",[delivery],$$,"{'address1': '105 Franklin St', 'address2': ''...",14016374575,(401) 637-4575,8510.303267


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
113,4iPrkrivosySWnUao6mv-g,high-rollers-luxury-lanes-and-sports-lounge-ma...,High Rollers Luxury Lanes & Sports Lounge,https://s3-media2.fl.yelpcdn.com/bphoto/iENGJd...,False,https://www.yelp.com/biz/high-rollers-luxury-l...,133,"[{'alias': 'bowling', 'title': 'Bowling'}, {'a...",3.5,"{'latitude': 41.473964, 'longitude': -71.960782}",[],$$,"{'address1': '350 Trolley Line Blvd', 'address...",18603122695.0,(860) 312-2695,17235.746302
114,KAYnFEZLlgdPlS15SGJIMw,steak-loft-restaurant-mystic,Steak Loft Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/90SW9l...,False,https://www.yelp.com/biz/steak-loft-restaurant...,345,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",3.0,"{'latitude': 41.3731719941441, 'longitude': -7...",[delivery],$$,"{'address1': '27 Coogan Blvd 24', 'address2': ...",18605362661.0,(860) 536-2661,17420.300422
115,MVvT5HTMBa3RpIxQnJcepA,rainmaker-buffet-mashantucket-4,Rainmaker Buffet,https://s3-media2.fl.yelpcdn.com/bphoto/l4Xz1o...,False,https://www.yelp.com/biz/rainmaker-buffet-mash...,60,"[{'alias': 'buffets', 'title': 'Buffets'}]",2.5,"{'latitude': 41.47338533, 'longitude': -71.959...",[],$$,"{'address1': '350 Trolley Line Blvd', 'address...",,,17153.122078
116,zeb7QrrsbgcY-F0rmGybyw,veranda-cafe-mashantucket,Veranda Cafe,https://s3-media3.fl.yelpcdn.com/bphoto/NvxEbv...,False,https://www.yelp.com/biz/veranda-cafe-mashantu...,84,"[{'alias': 'breakfast_brunch', 'title': 'Break...",2.5,"{'latitude': 41.4739246, 'longitude': -71.9612...",[delivery],$$,"{'address1': '350 Trolley Line Blvd', 'address...",18003699663.0,(800) 369-9663,17268.675038
117,8BNwIfYSksfO1ljCkaie5A,shrine-restaurant-ledyard,Shrine Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/m0jgig...,False,https://www.yelp.com/biz/shrine-restaurant-led...,71,"[{'alias': 'lounges', 'title': 'Lounges'}, {'a...",2.5,"{'latitude': 41.47354, 'longitude': -71.9573}",[],$$$,"{'address1': '39 Norwich Westerly Rd', 'addres...",18603128888.0,(860) 312-8888,17148.038383


In [93]:
# Checking for duplicates
final_df.duplicated(subset='id').sum()

0

In [94]:
# Saving final Dataframe to a .csv file
final_df.to_csv('Data/final_results_Pizza.csv.gz', compression='gzip', index=False)