# Efficient Yelp API Calls

In [1]:
#import necessary library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
#additional imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
#Load in your API credentials using the JSON module
#display what keys are in the dictionary
with open ('/Users/chas/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
#create an instance with the key
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x7fdd38608290>

In [4]:
#set our API call parametrs and filename before first call
LOCATION = 'Manhattan, NY, 10010'
TERM = 'Pizza'

In [5]:
#specify JSON_FILE filename
#include the searchterms in the filename
JSON_FILE = f"Data/results_in_progress_pizza.json"
JSON_FILE

'Data/results_in_progress_pizza.json'

In [6]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_pizza.json not found. Saving empty list to file.


In [7]:
#load previous results and use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)

#set offset based on previous results
n_results = len(previous_results)
print(f'-{n_results} previous results found.')

-0 previous results found.


In [8]:
#use our yelp_api variable's search query method to perform
#our API call
results = yelp_api.search_query(location=LOCATION, term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [9]:
#how many results in total?
total_results = results['total']
total_results

1700

In [10]:
#how many did we get details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [11]:
#use math.ceil to round up for the total number of page results
n_pages = math.ceil((results['total']-n_results)/results_per_page)
n_pages

85

In [12]:
#join new results with old list with extend and save to file
previous_results.extend(results['businesses'])
with open(JSON_FILE, 'w') as f:
    json.dump(previous_results, f)

In [13]:
#set up progress bar in loop
for i in tqdm_notebook(range(n_pages)):
    time.sleep(.2) #adds 200ms pause

  0%|          | 0/85 [00:00<?, ?it/s]

In [14]:
#for loop to call each page
for i in tqdm_notebook(range(1, n_pages+1)):
    try:
        time.sleep(.2)
        #read results in. progress file and check length
        with open (JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        #sae number of results to use as offset
        n_results = len(previous_results)
        #use n_results as offset
        results = yelp_api.search_query(location = LOCATION,
                                       term = TERM,
                                       offset = n_results+1)
        #append new results and save to file
        previous_results.extend(results['businesses'])
        #disply previous results
        with open (JSON_FILE, 'w') as f:
            json.dump(previous_results, f)
    except Exception as e: 
        print('[!] ERROR: ', e)
        break
print('Loop completed.')

  0%|          | 0/85 [00:00<?, ?it/s]

[!] ERROR:  VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
Loop completed.


In [15]:
#load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,vVVmHacu2YAxKfpIjhfd1g,norma-new-york-5,Norma,https://s3-media3.fl.yelpcdn.com/bphoto/qJKt5l...,False,https://www.yelp.com/biz/norma-new-york-5?adju...,516,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 40.743553, 'longitude': -73.980026}","[pickup, delivery]",$$,"{'address1': '438 3rd Ave', 'address2': None, ...",12128890600,(212) 889-0600,575.817909
1,kIm0oeOaVfC9-2flLPVklA,patrizias-pizza-and-pasta-new-york,Patrizia's Pizza and Pasta,https://s3-media1.fl.yelpcdn.com/bphoto/HG4eRB...,False,https://www.yelp.com/biz/patrizias-pizza-and-p...,695,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.0,"{'latitude': 40.7398415, 'longitude': -73.9792...","[pickup, delivery]",$$,"{'address1': '462-466 2nd Ave', 'address2': ''...",12125979999,(212) 597-9999,331.851292
2,zj8Lq1T8KIC5zwFief15jg,prince-street-pizza-new-york-2,Prince Street Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/ZAukOy...,False,https://www.yelp.com/biz/prince-street-pizza-n...,4286,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.72308755605564, 'longitude': -...","[pickup, delivery]",$,"{'address1': '27 Prince St', 'address2': None,...",12129664100,(212) 966-4100,2014.210598
3,0UdC0VYX6aAvLiOcpR4MVQ,joes-pizza-new-york-7,Joe's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/BTCcvl...,False,https://www.yelp.com/biz/joes-pizza-new-york-7...,977,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.73331, 'longitude': -73.98763}","[pickup, delivery]",$,"{'address1': '150 E 14th St', 'address2': '', ...",12123889474,(212) 388-9474,748.773398
4,J38XGzriEkMsDfCMt6et_g,vezzo-nycthincrust-pizza-new-york-2,VEZZO NYCThinCrust Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/UaxaZP...,False,https://www.yelp.com/biz/vezzo-nycthincrust-pi...,1330,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 40.744435447210506, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '178 Lexington Ave', 'address2': ...",12128398300,(212) 839-8300,629.840528


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
975,fjGTnLTvt64N8sk0_MZcGA,8th-street-winecellar-new-york-2,8th Street Winecellar,https://s3-media4.fl.yelpcdn.com/bphoto/IsRfNy...,False,https://www.yelp.com/biz/8th-street-winecellar...,362,"[{'alias': 'wine_bars', 'title': 'Wine Bars'},...",4.0,"{'latitude': 40.7327981, 'longitude': -73.9978...",[delivery],$$,"{'address1': '28 W 8th St', 'address2': '', 'a...",12122609463,(212) 260-9463,1435.9476
976,auQNB6NU6gkL6BrePqyZpQ,one-twenty-two-new-york,One Twenty-Two,https://s3-media3.fl.yelpcdn.com/bphoto/BpnECJ...,False,https://www.yelp.com/biz/one-twenty-two-new-yo...,2,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.5,"{'latitude': 40.72707, 'longitude': -73.985288}",[],$$,"{'address1': '122 1st Ave', 'address2': '', 'a...",12125332809,(212) 533-2809,1328.572642
977,ehUuSk5gPTCQmwS_ubgKRA,upstate-craft-beer-and-oyster-bar-new-york-4,Upstate Craft Beer & Oyster Bar,https://s3-media2.fl.yelpcdn.com/bphoto/V3pVDP...,False,https://www.yelp.com/biz/upstate-craft-beer-an...,1964,"[{'alias': 'seafood', 'title': 'Seafood'}]",4.5,"{'latitude': 40.726373, 'longitude': -73.986557}","[restaurant_reservation, delivery]",$$,"{'address1': '95 1st Ave', 'address2': None, '...",16467915400,(646) 791-5400,1425.08302
978,w6rW21MYzwysEbCTosfD2w,holyland-market-new-york,Holyland Market,https://s3-media2.fl.yelpcdn.com/bphoto/lPqA0h...,False,https://www.yelp.com/biz/holyland-market-new-y...,48,"[{'alias': 'importedfood', 'title': 'Imported ...",4.5,"{'latitude': 40.72689, 'longitude': -73.98385}","[pickup, delivery]",$$,"{'address1': '122 St Marks Pl', 'address2': ''...",12124774440,(212) 477-4440,1334.075947
979,kPOPOTorOJeYkQNB9AgjkA,cafe-900-new-york,Cafe 900,,False,https://www.yelp.com/biz/cafe-900-new-york?adj...,1,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",5.0,"{'latitude': 40.75424, 'longitude': -73.96567}",[],,"{'address1': '900 1st Ave', 'address2': '', 'a...",12128290721,(212) 829-0721,2236.335234


In [16]:
#check for duplicates
final_df.duplicated(subset='id').sum()

1

In [17]:
final_df[final_df.duplicated(subset='id')]

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
642,Ri67Ia7k0bBmgTCLvlbRBQ,accent-restaurant-and-bar-new-york,Accent Restaurant & Bar,https://s3-media3.fl.yelpcdn.com/bphoto/yeMC-5...,False,https://www.yelp.com/biz/accent-restaurant-and...,66,"[{'alias': 'mideastern', 'title': 'Middle East...",4.5,"{'latitude': 40.74303, 'longitude': -73.97986}","[pickup, delivery]",,"{'address1': '429 3rd Ave', 'address2': '', 'a...",12126794929,(212) 679-4929,524.80998


In [19]:
#drop the duplicates
final_df = final_df.drop_duplicates(subset='id')

In [20]:
#make sure duplicates were dropped
final_df.duplicated(subset='id').sum()

0

In [21]:
#save the final results to a compressed csv
final_df.to_csv('Data/Final_results_pizza.csv.gz', 
               compression = 'gzip', index=False)