In [1]:
import pandas as pd
import numpy as np

import os, json, math, time

# to make yelpapi calls
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook


In [2]:
!pip install yelpapi
!pip install tqdm



# JSON with Python

In [4]:
# Load API Credentials
with open('/Users/kristinadibella/.secret/yelp_api.json','r') as f: 
    login = json.load(f)
login.keys()



dict_keys(['Client-ID', 'api-key'])

# Yelp API Packages

In [6]:
# Login
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api



<yelpapi.yelpapi.YelpAPI at 0x1270f5af0>

In [7]:
help(yelp_api.search_query)



Help on method search_query in module yelpapi.yelpapi:

search_query(**kwargs) method of yelpapi.yelpapi.YelpAPI instance
    Query the Yelp Search API.
    
    documentation: https://www.yelp.com/developers/documentation/v3/business_search
    
    required parameters:
        * one of either:
            * location - text specifying a location to search for
            * latitude and longitude



In [14]:
# set our API call parameters 
LOCATION = 'Perris,CA'
TERM = 'Burgers'


In [15]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_perris_burgers.json"
JSON_FILE



'Data/results_in_progress_perris_burgers.json'

In [19]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")


[i] Data/results_in_progress_perris_burgers.json already exists.


In [20]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')



- 0 previous results found.


In [21]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()



dict_keys(['businesses', 'total', 'region'])

In [22]:
## How many results total?
total_results = results['total']
total_results



340

In [23]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page



20

In [24]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

17

In [25]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)


In [26]:
# Set up a progress bar in our for loop.
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 


  0%|          | 0/17 [00:00<?, ?it/s]

In [27]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)

  0%|          | 0/17 [00:00<?, ?it/s]

In [29]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,0RPeIss97n3DcvDkRiWDew,la-gare-café-perris-3,La Gare Café,https://s3-media2.fl.yelpcdn.com/bphoto/WnA4Y4...,False,https://www.yelp.com/biz/la-gare-caf%C3%A9-per...,435,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 33.785825201397195, 'longitude': ...",[delivery],$,"{'address1': '24 S D St', 'address2': '', 'add...",19517224528,(951) 722-4528,1015.862022
1,yeFr8HVNS_vqiUCeqxO0Wg,family-basket-perris,Family Basket,https://s3-media3.fl.yelpcdn.com/bphoto/rju8-g...,False,https://www.yelp.com/biz/family-basket-perris?...,85,"[{'alias': 'burgers', 'title': 'Burgers'}]",3.5,"{'latitude': 33.77957, 'longitude': -117.24491}",[delivery],$,"{'address1': '670 W 7th St', 'address2': '', '...",19519435050,(951) 943-5050,2645.503539
2,YjQMmFMNKLSsiPekmQ03TQ,the-bombshelter-perris-3,The Bombshelter,https://s3-media4.fl.yelpcdn.com/bphoto/vHKjIk...,False,https://www.yelp.com/biz/the-bombshelter-perri...,84,"[{'alias': 'sportsbars', 'title': 'Sports Bars...",4.0,"{'latitude': 33.76211525746504, 'longitude': -...",[delivery],$,"{'address1': '2091 Goetz Rd', 'address2': '', ...",19519434863,(951) 943-4863,3351.608634
3,niPP2tmMHfWhB5_wKVr7ow,tastee-freez-perris-2,Tastee Freez,https://s3-media2.fl.yelpcdn.com/bphoto/GsgAHv...,False,https://www.yelp.com/biz/tastee-freez-perris-2...,52,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 33.782748, 'longitude': -117.226861}",[delivery],$,"{'address1': '168 E 4th St', 'address2': '', '...",19516575018,(951) 657-5018,1198.828133
4,R-GUJVKJmRZYc4XNoeVODg,gus-jr-perris,Gus Jr,https://s3-media3.fl.yelpcdn.com/bphoto/zj0zUF...,False,https://www.yelp.com/biz/gus-jr-perris?adjust_...,158,"[{'alias': 'breakfast_brunch', 'title': 'Break...",2.5,"{'latitude': 33.7820583429796, 'longitude': -1...","[pickup, delivery]",$,"{'address1': '497 E 4th St', 'address2': '', '...",19516573728,(951) 657-3728,1136.328607


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
335,-AeAGbYMDJT9LHDn7vaRZg,kfc-lake-elsinore-3,KFC,https://s3-media2.fl.yelpcdn.com/bphoto/dHchnd...,False,https://www.yelp.com/biz/kfc-lake-elsinore-3?a...,110,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",2.5,"{'latitude': 33.664066, 'longitude': -117.298995}","[pickup, delivery]",$,"{'address1': '321 Summerhill Dr', 'address2': ...",19512454060,(951) 245-4060,15988.184403
336,Yu_ynVWHfA5NThYHbEwQyA,del-taco-riverside-3,Del Taco,https://s3-media3.fl.yelpcdn.com/bphoto/5MtPbd...,False,https://www.yelp.com/biz/del-taco-riverside-3?...,69,"[{'alias': 'mexican', 'title': 'Mexican'}]",2.0,"{'latitude': 33.952983, 'longitude': -117.38829}","[pickup, delivery]",$,"{'address1': '6333 Riverside Ave', 'address2':...",19516826333,(951) 682-6333,23649.995465
337,HbISHXpHe6dgH0gAGkImgw,del-taco-riverside-2,Del Taco,https://s3-media1.fl.yelpcdn.com/bphoto/Wrh3tm...,False,https://www.yelp.com/biz/del-taco-riverside-2?...,75,"[{'alias': 'mexican', 'title': 'Mexican'}]",3.5,"{'latitude': 33.945747, 'longitude': -117.417571}","[pickup, delivery]",$,"{'address1': '5290 Arlington Ave', 'address2':...",19516875397,(951) 687-5397,24936.246917
338,yvOH4DAbIJVq4BPso8qJYg,del-taco-riverside-9,Del Taco,https://s3-media2.fl.yelpcdn.com/bphoto/3wkN5J...,False,https://www.yelp.com/biz/del-taco-riverside-9?...,38,"[{'alias': 'mexican', 'title': 'Mexican'}]",2.5,"{'latitude': 33.94688, 'longitude': -117.38676}","[pickup, delivery]",$,"{'address1': '3487 Arlington Ave', 'address2':...",19513691127,(951) 369-1127,23050.387799
339,H35Uj92N9aKeuolQopPvgQ,del-taco-lake-elsinore-2,Del Taco,https://s3-media2.fl.yelpcdn.com/bphoto/qRcRk1...,False,https://www.yelp.com/biz/del-taco-lake-elsinor...,99,"[{'alias': 'mexican', 'title': 'Mexican'}]",2.0,"{'latitude': 33.65682, 'longitude': -117.29777}","[pickup, delivery]",$,"{'address1': '31904 Mission Trl', 'address2': ...",19516745771,(951) 674-5771,16653.683258


In [32]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()


0

In [33]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_perris_burgers.csv.gz', 
                compression='gzip',index=False)



## First Way

In [8]:
# Query of my area and food choice
results = yelp_api.search_query(location='Perris, CA',
                                       term='Burgers')
print(type(results))
results.keys()



<class 'dict'>


dict_keys(['businesses', 'total', 'region'])

In [9]:
## Search totals
results['total']


340

In [10]:
# Results to DF
burgers = pd.DataFrame(results['businesses'])
burgers.head(2) 


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,0RPeIss97n3DcvDkRiWDew,la-gare-café-perris-3,La Gare Café,https://s3-media2.fl.yelpcdn.com/bphoto/WnA4Y4...,False,https://www.yelp.com/biz/la-gare-caf%C3%A9-per...,435,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 33.785825201397195, 'longitude': ...",[delivery],$,"{'address1': '24 S D St', 'address2': '', 'add...",19517224528,(951) 722-4528,1015.862022
1,yeFr8HVNS_vqiUCeqxO0Wg,family-basket-perris,Family Basket,https://s3-media3.fl.yelpcdn.com/bphoto/rju8-g...,False,https://www.yelp.com/biz/family-basket-perris?...,85,"[{'alias': 'burgers', 'title': 'Burgers'}]",3.5,"{'latitude': 33.77957, 'longitude': -117.24491}",[delivery],$,"{'address1': '670 W 7th St', 'address2': '', '...",19519435050,(951) 943-5050,2645.503539


In [11]:
## how many businesses in our results
len(results['businesses'])



20

Pagination and "offset"

Obtain ONLY the first two pages of results

In [12]:
# add offset to our original api call
# account for page break?
results_20 = yelp_api.search_query(location='Perris, CA',
                                       term='Burgers',
                                       offset = 20)



In [13]:
burgers20 = pd.DataFrame(results_20['businesses'])
burgers20.head(2)



Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,JTuxFcWYzIjvLQe7qjSUZQ,jacks-bar-b-q-lake-elsinore-2,Jack's Bar-B-Q,https://s3-media2.fl.yelpcdn.com/bphoto/621cH3...,False,https://www.yelp.com/biz/jacks-bar-b-q-lake-el...,1263,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.0,"{'latitude': 33.6738157425907, 'longitude': -1...",[delivery],$$,"{'address1': '1604 W Lakeshore Dr', 'address2'...",19512456500,(951) 245-6500,17466.63237
1,na8kJ2ecDBYghMIsXWDO_A,flat-top-bar-and-grill-riverside-2,Flat Top Bar and Grill,https://s3-media3.fl.yelpcdn.com/bphoto/uw6Klv...,False,https://www.yelp.com/biz/flat-top-bar-and-gril...,317,"[{'alias': 'pubs', 'title': 'Pubs'}, {'alias':...",4.0,"{'latitude': 33.88548, 'longitude': -117.34994}","[delivery, pickup]",$$,"{'address1': '17960 Van Buren Blvd', 'address2...",19517800114,(951) 780-0114,15826.499025


Save the businesses as a records-oriented JSON file. (df.to_json(orient='records'))

In [None]:
all_burgers.to_json(orient= 'records')

Concatenate the results into one data frame

In [None]:
## concatenate the previous results and new results. 
all_burgers = pd.concat([burgers, burgers20],
                      ignore_index=True)
display(all_burgers.head(3), all_burgers.tail(3))



In [None]:
# To save data
with open('/Users/kristinadibella/.secret/yelp_api.json','w') as f: 
    json.dump(all_burgers, f)