# Efficient Yelp API Calls (Core)

In [4]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from tqdm.notebook import tqdm_notebook

In [5]:
!pip install yelpapi

Collecting yelpapi
  Downloading yelpapi-2.5.0-py3-none-any.whl (7.4 kB)
Installing collected packages: yelpapi
Successfully installed yelpapi-2.5.0


In [6]:
from yelpapi import YelpAPI

### Load Credentials and Create Yelp API Object

In [7]:
# Load API Credentials
with open ('C:/Users/badbr/.secret/yelp_api.json', 'r') as f:
    login = json.load(f)

In [9]:
# Instantiate YelpAPI Variable
Yelp = YelpAPI(login['api-key'], timeout_s = 5.0)

### Define Search Terms and File Paths

In [11]:
# set our API call parameters and filename before the first call
location = 'Orlando, FL 34786'
term = 'steak'

In [12]:
location.split(',')[0]

'Orlando'

In [15]:
## Specify fodler for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)

# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [16]:
JSON_FILE

'Data/Orlando-steak.json'

### Check if Json File exists and Create it if it doesn't

In [24]:
## Check if JSON_FILE exists
file_exist = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exist == False:  
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i]{JSON_FILE} not found. Saving empty list to file.')
    
    
    ## save the first page of results
    with open (JSON_FILE, 'w') as f:
        json.dump([], f)
## If it exists, inform user
else:
    print(f'[i] {JSON_FILE} already exists.')

[i] Data/Orlando-steak.json already exists.


### Make the first API call to get the first page of data

In [26]:
# use our yelp_api variable's search_query method to perform our API call
results = Yelp.search_query(term = term, location = location)

In [27]:
type(results)

dict

In [28]:
len(results)

3

In [29]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [30]:
results['total']

1900

In [31]:
results['region']

{'center': {'longitude': -81.55563354492188, 'latitude': 28.479517002460295}}

In [32]:
results['businesses']

[{'id': '9rIAl_UPCS3ODRMdSyaPgA',
  'alias': 'steak-on-fire-orlando',
  'name': 'Steak on Fire',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/vzahwTxp5vh2LRwMNFwOQw/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/steak-on-fire-orlando?adjust_creative=0sqSARBZBNeJkSSMstb4jg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=0sqSARBZBNeJkSSMstb4jg',
  'review_count': 358,
  'categories': [{'alias': 'steak', 'title': 'Steakhouses'},
   {'alias': 'brazilian', 'title': 'Brazilian'},
   {'alias': 'newamerican', 'title': 'American (New)'}],
  'rating': 4.5,
  'coordinates': {'latitude': 28.4506829, 'longitude': -81.485305},
  'transactions': ['pickup', 'delivery'],
  'price': '$$',
  'location': {'address1': '7541 Sand Lake Rd',
   'address2': 'Ste A',
   'address3': '',
   'city': 'Orlando',
   'zip_code': '32819',
   'country': 'US',
   'state': 'FL',
   'display_address': ['7541 Sand Lake Rd', 'Ste A', 'Orlando, FL 32819']},
  'phone': '+14074

In [34]:
## How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,9rIAl_UPCS3ODRMdSyaPgA,steak-on-fire-orlando,Steak on Fire,https://s3-media3.fl.yelpcdn.com/bphoto/vzahwT...,False,https://www.yelp.com/biz/steak-on-fire-orlando...,358,"[{'alias': 'steak', 'title': 'Steakhouses'}, {...",4.5,"{'latitude': 28.4506829, 'longitude': -81.485305}","[pickup, delivery]",$$,"{'address1': '7541 Sand Lake Rd', 'address2': ...",14074402323,(407) 440-2323,7585.656804
1,yS73rFfQFV2ggXQSlhv3Uw,matthews-steakhouse-winter-garden-2,Matthew's Steakhouse,https://s3-media3.fl.yelpcdn.com/bphoto/jkRiGG...,False,https://www.yelp.com/biz/matthews-steakhouse-w...,133,"[{'alias': 'steak', 'title': 'Steakhouses'}]",4.0,"{'latitude': 28.5645698339131, 'longitude': -8...",[delivery],$$$,"{'address1': '360 W Plant St', 'address2': '',...",14075207511,(407) 520-7511,10044.05779
2,wit_AlzCJqyq8IIytFxORg,the-wharf-at-sunset-walk-kissimmee-2,The Wharf at Sunset Walk,https://s3-media2.fl.yelpcdn.com/bphoto/6i1p7c...,False,https://www.yelp.com/biz/the-wharf-at-sunset-w...,418,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 28.343873101245265, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '3274 Margaritaville Blvd', 'addr...",14079547290,(407) 954-7290,15769.757232
3,uV0N049zQmA_PUnbIK3oyg,eddie-vs-prime-seafood-orlando-2,Eddie V's Prime Seafood,https://s3-media4.fl.yelpcdn.com/bphoto/8F2qYH...,False,https://www.yelp.com/biz/eddie-vs-prime-seafoo...,1048,"[{'alias': 'seafood', 'title': 'Seafood'}, {'a...",4.5,"{'latitude': 28.44976, 'longitude': -81.48521}",[delivery],$$$,"{'address1': '7488 W Sand Lake Rd', 'address2'...",14073553011,(407) 355-3011,7794.68848
4,aBdD__fQY23MKOIPmekrXw,the-whiskey-orlando,The Whiskey,https://s3-media3.fl.yelpcdn.com/bphoto/gCMLA8...,False,https://www.yelp.com/biz/the-whiskey-orlando?a...,849,"[{'alias': 'tradamerican', 'title': 'American ...",4.5,"{'latitude': 28.450702, 'longitude': -81.486127}",[delivery],$$,"{'address1': '7563 W Sand Lake Rd', 'address2'...",14079306517,(407) 930-6517,7512.004788
5,_N8r_L8osRlusLJLfF3MPA,br77-brazilian-steakhouse-celebration,BR77 Brazilian Steakhouse,https://s3-media4.fl.yelpcdn.com/bphoto/rLcGIO...,False,https://www.yelp.com/biz/br77-brazilian-steakh...,48,"[{'alias': 'steak', 'title': 'Steakhouses'}, {...",4.0,"{'latitude': 28.344453143028165, 'longitude': ...",[],,"{'address1': '3228 Margaritaville Blvd', 'addr...",13219004377,(321) 900-4377,15901.047756
6,s2fM5fhc_QMow26UWugMog,bull-and-bear-orlando-2,Bull and Bear,https://s3-media4.fl.yelpcdn.com/bphoto/WKgtcV...,False,https://www.yelp.com/biz/bull-and-bear-orlando...,446,"[{'alias': 'steak', 'title': 'Steakhouses'}, {...",4.5,"{'latitude': 28.352868, 'longitude': -81.533747}",[],$$$$,"{'address1': '14200 Bonnet Creek Resort Ln', '...",14075975500,(407) 597-5500,14244.446547
7,ZmC6t-RbvQ_HaNuZ3kighA,delmonicos-italian-steak-house-orlando,Delmonico's Italian Steak House,https://s3-media3.fl.yelpcdn.com/bphoto/-EKhF9...,False,https://www.yelp.com/biz/delmonicos-italian-st...,1239,"[{'alias': 'steak', 'title': 'Steakhouses'}, {...",4.0,"{'latitude': 28.420272, 'longitude': -81.460726}",[delivery],$$,"{'address1': '6115 Westwood Blvd', 'address2':...",14072262662,(407) 226-2662,11379.523523
8,fwrYgOJCja_a6GozuPmOew,boteco-do-manolo-windermere,Boteco do Manolo,https://s3-media2.fl.yelpcdn.com/bphoto/Zg6yQv...,False,https://www.yelp.com/biz/boteco-do-manolo-wind...,5,"[{'alias': 'brazilian', 'title': 'Brazilian'},...",4.5,"{'latitude': 28.44898180468064, 'longitude': -...",[],,"{'address1': '11620 Lakeside Village Ln', 'add...",14076148310,(407) 614-8310,3456.146666
9,Vlwa2HgckFlUmAqTN1HEfQ,teak-neighborhood-grill-orlando,Teak Neighborhood Grill,https://s3-media4.fl.yelpcdn.com/bphoto/HRjCIw...,False,https://www.yelp.com/biz/teak-neighborhood-gri...,969,"[{'alias': 'burgers', 'title': 'Burgers'}, {'a...",4.0,"{'latitude': 28.50895, 'longitude': -81.47101}",[delivery],$$,"{'address1': '6400 Times Sq Ave', 'address2': ...",14073135111,(407) 313-5111,8921.531236


- Where is the actual data we want to save?

In [37]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

- Calculate how many pages of results needed to cover the total_results

In [38]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

95

In [48]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results for to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = Yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print(' [!] ERROR', e)


  0%|          | 0/95 [00:00<?, ?it/s]

 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.
 [!] ERROR VALIDATION_ERROR: Too many resul

## Open the Final JSON File with Pandas

In [40]:
df = pd.read_json(JSON_FILE)

In [41]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Orlando-steak.csv.gz'

In [42]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)

## Bonus: compare filesize with os module's `os.path.getsize`

In [43]:
size_json = os.path.getsize(JSON_FILE)
size_csv_gz = os.path.getsize(JSON_FILE.replace('.json','.csv.gz'))

print(f'JSON FILE: {size_json:,} Bytes')
print(f'CSV.GZ FILE: {size_csv_gz:,} Bytes')

print(f'the csv.gz is {size_json/size_csv_gz} times smaller!')

JSON FILE: 2 Bytes
CSV.GZ FILE: 46 Bytes
the csv.gz is 0.043478260869565216 times smaller!
