In [3]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook


In [6]:
# Load API Credentials
with open('/Users/alisonwilliams/Documents/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [15]:
# set our API call parameters 
LOCATION = 'Seattle'
TERM = 'Sushi'

In [16]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE_sushi = "Data/results_in_progress_wa_sushi.json"
JSON_FILE_sushi

'Data/results_in_progress_wa_sushi.json'

In [17]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE_sushi)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE_sushi)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE_sushi} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE_sushi,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE_sushi} already exists.")

[i] Data/results_in_progress_wa_sushi.json already exists.


In [18]:
## Load previous results and use len of results for offset
with open(JSON_FILE_sushi,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [19]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [20]:
## How many results total?
total_results = results['total']
total_results

884

In [21]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page


20

In [22]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

45

In [24]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE_sushi,'w') as f:
     json.dump(previous_results,f)

In [25]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/45 [00:00<?, ?it/s]

In [29]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE_sushi, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE_sushi,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)

  0%|          | 0/45 [00:00<?, ?it/s]

In [30]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE_sushi, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE_sushi,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/45 [00:00<?, ?it/s]

In [31]:
# load final results
final_df = pd.read_json(JSON_FILE_sushi)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,YSQiqH7RIWORk_Qp-A4SOg,shiros-seattle,Shiro's,https://s3-media1.fl.yelpcdn.com/bphoto/82qOHj...,False,https://www.yelp.com/biz/shiros-seattle?adjust...,2482,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 47.6147782162181, 'longitude': -1...","[delivery, pickup]",$$$$,"{'address1': '2401 2nd Ave', 'address2': '', '...",12064439844,(206) 443-9844,1298.562261
1,3hxncVCRPfmKLOyiXKXbDw,sushi-kashiba-seattle,Sushi Kashiba,https://s3-media3.fl.yelpcdn.com/bphoto/vJlqSj...,False,https://www.yelp.com/biz/sushi-kashiba-seattle...,1308,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 47.6099251, 'longitude': -122.341...",[delivery],$$$$,"{'address1': '86 Pine St', 'address2': 'Ste 1'...",12064418844,(206) 441-8844,992.223566
2,vs5dhED5Yf8kqPp06OD2QQ,momiji-seattle,Momiji,https://s3-media3.fl.yelpcdn.com/bphoto/ahKa5Z...,False,https://www.yelp.com/biz/momiji-seattle?adjust...,1708,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 47.61482, 'longitude': -122.31657}",[delivery],$$,"{'address1': '1522 12th Ave', 'address2': '', ...",12064574068,(206) 457-4068,1015.019475
3,L8RRAd-JZ0Bd4MER0yyX-g,japonessa-sushi-cocina-seattle,Japonessa Sushi Cocina,https://s3-media3.fl.yelpcdn.com/bphoto/vucCrk...,False,https://www.yelp.com/biz/japonessa-sushi-cocin...,5130,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 47.6079793649921, 'longitude': -1...",[delivery],$$,"{'address1': '1400 1st Ave', 'address2': '', '...",12069717979,(206) 971-7979,976.440057
4,dkZiHbwggWsB3QSQhy2New,rondo-seattle-2,Rondo,https://s3-media2.fl.yelpcdn.com/bphoto/8IgsAS...,False,https://www.yelp.com/biz/rondo-seattle-2?adjus...,393,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 47.62082, 'longitude': -122.32066}","[delivery, pickup]",$$,"{'address1': '224 Broadway E', 'address2': '',...",12065882051,(206) 588-2051,998.325547


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
879,xDmeu1Am5zz2oE3Y-ZHHmA,the-cheesecake-factory-lynnwood-6,The Cheesecake Factory,https://s3-media2.fl.yelpcdn.com/bphoto/faMPG8...,False,https://www.yelp.com/biz/the-cheesecake-factor...,591,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",2.5,"{'latitude': 47.8276689, 'longitude': -122.274...",[restaurant_reservation],$$,"{'address1': '3000 184th St SW', 'address2': '...",14254121376,(425) 412-1376,24089.64805
880,doxuwEk585gQHk_9My8tPQ,safeway-lynnwood-5,Safeway,https://s3-media2.fl.yelpcdn.com/bphoto/a3AglC...,False,https://www.yelp.com/biz/safeway-lynnwood-5?ad...,10,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 47.8811291, 'longitude': -122.280...",[],,"{'address1': '12811 Beverly Park Rd', 'address...",14253473060,(425) 347-3060,29895.206532
881,rGdo-yHqxFZLqZ_tvBEIVg,fred-meyer-lynnwood-3,Fred Meyer,https://s3-media3.fl.yelpcdn.com/bphoto/HKPXB1...,False,https://www.yelp.com/biz/fred-meyer-lynnwood-3...,105,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.0,"{'latitude': 47.8222615152993, 'longitude': -1...",[],$$,"{'address1': '4615 196th St Sw', 'address2': '...",14256700200,(425) 670-0200,23266.733682
882,Fkwrnw8NZcWAb2mbz8aXhQ,safeway-kirkland-3,Safeway,https://s3-media4.fl.yelpcdn.com/bphoto/dQeTMb...,False,https://www.yelp.com/biz/safeway-kirkland-3?ad...,64,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 47.6788233, 'longitude': -122.173...",[],$$,"{'address1': '12519 NE 85th St', 'address2': '...",14258228821,(425) 822-8821,13749.915853
883,7Cm7jGoBKlx29ZzSHWa8MQ,fred-meyer-kent-3,Fred Meyer,https://s3-media1.fl.yelpcdn.com/bphoto/AVh2sh...,False,https://www.yelp.com/biz/fred-meyer-kent-3?adj...,82,"[{'alias': 'grocery', 'title': 'Grocery'}, {'a...",2.5,"{'latitude': 47.385831, 'longitude': -122.205397}",[],$$,"{'address1': '10201 SE 240th St', 'address2': ...",12538595500,(253) 859-5500,27075.745591


In [32]:
# check for duplicate results
final_df.duplicated().sum()

TypeError: unhashable type: 'list'

In [33]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

22

In [34]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [35]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_seattle_sushi.csv.gz', compression='gzip',index=False)