# Core - Efficient Yelp API Calls

In [2]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [4]:
with open ('/Users/chris/.secret/yelp_api.json') as f:
        login = json.load(f)
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [5]:
# setting API call params
LOCATION = 'Dallas, TX'
TERM = 'Tacos'

In [8]:
# spec json_file 
JSON_FILE = "Data/TX_tacos_results.json"
JSON_FILE

'Data/TX_tacos_results.json'

In [10]:
# checking if json file exists
file_exist = os.path.isfile(JSON_FILE)

if file_exist == False:
    folder = os.path.dirname(JSON_FILE)
    
    if len(folder)>0:
        os.makedirs(folder,exist_ok=True)
    
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    with open(JSON_FILE,'w') as f:
        json.dump([],f)
        
else:
    print(f'[i] {JSON_FILE} already exists.')

[i] Data/TX_tacos_results.json already exists.


In [12]:
# loading previous results, using len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)

n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [13]:
# determining how many pages needed
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [14]:
# grabbing total results
total_results = results['total']
total_results

4300

In [15]:
# determing how many details obtained
results_per_page = len(results['businesses'])
results_per_page

20

In [16]:
# rounding total pages of results
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

215

In [17]:
# joining new results with old list
previous_results.extend(results['businesses'])
with open(JSON_FILE,'w') as f:
    json.dump(previous_results,f)

In [18]:
# for loop to call each page
for i in tqdm_notebook( range(1,n_pages+1)):
    with open(JSON_FILE,'r') as f:
        previous_results = json.load(f)
    
    n_results = len(previous_results)
    
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM,
                                    offset=n_results)
    
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
        
    time.sleep(.2)

  0%|          | 0/215 [00:00<?, ?it/s]

YelpAPIError: VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.

In [19]:
# removing file and confirming programmatic way
os.remove(JSON_FILE)
os.path.isfile(JSON_FILE)

False

In [20]:
# creating function to accept json_file
def create_json_file(JSON_FILE, delete_if_exists=False):
    
    file_exists = os.path.isfile(JSON_FILE)
    
    # IF file exists
    if file_exists == True:
        
        if delete_if_exists == True:
            print(f'[!] {JSON_FILE} already exists. Deleting previous file..')
            # deleting & confirming file removal
            os.remove(JSON_FILE)
            
            create_json_file(JSON_FILE, delete_if_exists=False)
        else:
            print(f'[i] {JSON_FILE} already exists.')
            
    # if file does NOT exist        
    else:
        
        print(f'[i] {JSON_FILE} not found. Saving empty list to new file.')
        
        folder = os.path.dirname(JSON_FILE)
        
        if len(folder)>0:
            os.makedirs(folder, exist_ok=True)
            
        with open(JSON_FILE,'w') as f:
            json.dump([],f)

In [21]:
# creating new empty json file
create_json_file(JSON_FILE, delete_if_exists=True)

with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)

n_results = len(previous_results)
print(f'- {n_results} previous results found.')

results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)

total_results = results['total']

results_per_page = len(results['businesses'])

n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/TX_tacos_results.json not found. Saving empty list to new file.
- 0 previous results found.


215

In [22]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    with open(JSON_FILE,'r') as f:
        previous_results=json.load(f)
    
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
        
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM,
                                   offset=n_results)
    
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
        
    time.sleep(.2)

  0%|          | 0/215 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [23]:
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,end4ugjsE0EWtGWswkprOA,mami-coco-dallas-2,Mami Coco,https://s3-media4.fl.yelpcdn.com/bphoto/Ru6JAt...,False,https://www.yelp.com/biz/mami-coco-dallas-2?ad...,295,"[{'alias': 'tacos', 'title': 'Tacos'}]",5.0,"{'latitude': 32.8003812, 'longitude': -96.777516}","[delivery, pickup]",$,"{'address1': '4500 Bryan St', 'address2': 'Ste...",14699962834,(469) 996-2834,4807.24147
1,14CB7s3RctxhLAvvI4sQMQ,el-come-taco-dallas,El Come Taco,https://s3-media4.fl.yelpcdn.com/bphoto/tc_jv9...,False,https://www.yelp.com/biz/el-come-taco-dallas?a...,567,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 32.8123115, 'longitude': -96.7835...","[delivery, pickup]",$,"{'address1': '2513 N Fitzhugh Ave', 'address2'...",12148213738,(214) 821-3738,3355.929875
2,6c7hh5pjyGZxaJiyGZBG5g,tacos-la-banqueta-dallas-4,Tacos La Banqueta,https://s3-media4.fl.yelpcdn.com/bphoto/3IIw5H...,False,https://www.yelp.com/biz/tacos-la-banqueta-dal...,537,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 32.79954, 'longitude': -96.7771099}",[delivery],$,"{'address1': '1305 N Carroll Ave', 'address2':...",12148231260,(214) 823-1260,4910.418498
3,Y6JRG1rjgplB8I_YHWjpkA,el-tacaso-dallas-5,El Tacaso,https://s3-media2.fl.yelpcdn.com/bphoto/ezdi1-...,False,https://www.yelp.com/biz/el-tacaso-dallas-5?ad...,282,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 32.85853, 'longitude': -96.874457...",[delivery],$,"{'address1': '2739 W NW Hwy', 'address2': '', ...",12143660176,(214) 366-0176,7237.278571
4,GbKN-edIpqhNXH_iItk04w,velvet-taco-dallas-16,Velvet Taco,https://s3-media3.fl.yelpcdn.com/bphoto/qsehTm...,False,https://www.yelp.com/biz/velvet-taco-dallas-16...,2953,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 32.8216823671486, 'longitude': -9...","[delivery, pickup]",$,"{'address1': '3012 N Henderson Ave', 'address2...",12148238358,(214) 823-8358,2394.176839


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,mhehMNiw_XglMpcLr1G7pQ,chante-mexican-grill-and-cantina-lewisville,Chante Mexican Grill & Cantina,https://s3-media2.fl.yelpcdn.com/bphoto/YtRNLL...,False,https://www.yelp.com/biz/chante-mexican-grill-...,244,"[{'alias': 'mexican', 'title': 'Mexican'}, {'a...",4.5,"{'latitude': 33.006418, 'longitude': -97.013369}","[delivery, pickup]",$$,"{'address1': '951 W Round Grove Rd 200', 'addr...",14692938098.0,(469) 293-8098,27221.320478
996,VDX3T2TuiGdqZ_aV4syh5g,taco-grande-dallas,Taco Grande,,False,https://www.yelp.com/biz/taco-grande-dallas?ad...,1,"[{'alias': 'mexican', 'title': 'Mexican'}]",5.0,"{'latitude': 32.8807851, 'longitude': -96.8790...",[],$,"{'address1': '2962 Walnut Hill Ln', 'address2'...",,,8659.055107
997,d3qYWYZZ55unPi8Ka46mjw,placidos-cocina-dallas-2,Placidos Cocina,https://s3-media2.fl.yelpcdn.com/bphoto/XJypag...,False,https://www.yelp.com/biz/placidos-cocina-dalla...,9,"[{'alias': 'tex-mex', 'title': 'Tex-Mex'}, {'a...",3.5,"{'latitude': 32.7637319890399, 'longitude': -9...",[delivery],,"{'address1': '6939 Scyene Rd', 'address2': '',...",12149298829.0,(214) 929-8829,12513.066435
998,t7_wOHU3wLx7oMBgDtgicQ,taqueria-tepito-dallas-richardson,Taqueria Tepito - Dallas,https://s3-media2.fl.yelpcdn.com/bphoto/jEMrJL...,False,https://www.yelp.com/biz/taqueria-tepito-dalla...,1,"[{'alias': 'mexican', 'title': 'Mexican'}]",5.0,"{'latitude': 33.0014865, 'longitude': -96.6830...","[delivery, pickup]",,"{'address1': '3601 N Jupiter Rd', 'address2': ...",14696260047.0,(469) 626-0047,21130.257569
999,7ijKB11MZR4_bhsyJCfCGg,koryo-kalbi-korean-bbq-dallas,Koryo Kalbi Korean BBQ,https://s3-media2.fl.yelpcdn.com/bphoto/EMrEEb...,False,https://www.yelp.com/biz/koryo-kalbi-korean-bb...,897,"[{'alias': 'korean', 'title': 'Korean'}]",4.0,"{'latitude': 32.895487, 'longitude': -96.893252}",[pickup],$$,"{'address1': '2560 Royal Ln', 'address2': 'Ste...",12142727486.0,(214) 272-7486,10694.508271


In [24]:
# checking duplicates
final_df.duplicated(subset='id').sum()

0