In [1]:
pip install tqdm


Note: you may need to restart the kernel to use updated packages.


In [2]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook



In [3]:
#Open yelp credential file
with open('/Users/OWNER/.secret/yelp_api.json') as f:
    login = json.load(f)
login.keys()



dict_keys(['client-id', 'api-key'])

In [4]:
# import the YelpAPI Class
from yelpapi import YelpAPI
# Create an instance with your key
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api


<yelpapi.yelpapi.YelpAPI at 0x2e181182f80>

In [5]:
# set our API call parameters 
LOCATION = 'Cleveland, OH'
TERM = 'Thai'


In [6]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_CLE_Thai.json"
JSON_FILE



'Data/results_in_progress_CLE_Thai.json'

In [7]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  



In [8]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



[!] Data/results_in_progress_CLE_Thai.json already exists. Deleting previous file...
[i] Data/results_in_progress_CLE_Thai.json not found. Saving empty list to new file.
- 0 previous results found.


11

In [9]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')



- 0 previous results found.


In [10]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()



dict_keys(['businesses', 'total', 'region'])

In [11]:
## How many results total?
total_results = results['total']
total_results



217

In [12]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page



20

In [13]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



11

In [14]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)


In [17]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)



  0%|          | 0/11 [00:00<?, ?it/s]

In [18]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,SGZAsdQAp0SjtMDjXWBPYw,banana-blossom-thai-cuisine-cleveland,Banana Blossom Thai Cuisine,https://s3-media4.fl.yelpcdn.com/bphoto/kdMhe3...,False,https://www.yelp.com/biz/banana-blossom-thai-c...,293,"[{'alias': 'thai', 'title': 'Thai'}]",4.5,"{'latitude': 41.48889, 'longitude': -81.70902}",[delivery],$$,"{'address1': '2800 Clinton Ave', 'address2': '...",12166965529,(216) 696-5529,3509.349779
1,228mfqUuGbnCs1kiupJXZw,map-of-thailand-cleveland,Map Of Thailand,https://s3-media2.fl.yelpcdn.com/bphoto/oL_zsp...,False,https://www.yelp.com/biz/map-of-thailand-cleve...,293,"[{'alias': 'thai', 'title': 'Thai'}]",4.5,"{'latitude': 41.5095, 'longitude': -81.66247}",[delivery],$$,"{'address1': '3710 Payne Ave', 'address2': 'St...",12163612220,(216) 361-2220,4082.313627
2,goZV-PlwLJOB8s5O3GruxA,bangkok-thai-cuisine-cleveland-2,Bangkok Thai Cuisine,https://s3-media4.fl.yelpcdn.com/bphoto/QgkuwP...,False,https://www.yelp.com/biz/bangkok-thai-cuisine-...,446,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 41.520317, 'longitude': -81.486442}","[delivery, pickup]",$$,"{'address1': '5359 Mayfield Rd', 'address2': '...",14406841982,(440) 684-1982,16335.738497
3,2JeciNHrJE_tI-o2tUCu5w,ty-fun-thai-bistro-cleveland,Ty Fun Thai Bistro,https://s3-media1.fl.yelpcdn.com/bphoto/GWlrV3...,False,https://www.yelp.com/biz/ty-fun-thai-bistro-cl...,218,"[{'alias': 'thai', 'title': 'Thai'}]",4.0,"{'latitude': 41.4794692993164, 'longitude': -8...","[pickup, delivery]",$$,"{'address1': '815 Jefferson Ave', 'address2': ...",12166641000,(216) 664-1000,1193.976211
4,LsCPb_oYzeB_LkDlDp5Udw,lotus-thai-house-parma,Lotus Thai House,https://s3-media4.fl.yelpcdn.com/bphoto/tzL_b8...,False,https://www.yelp.com/biz/lotus-thai-house-parm...,94,"[{'alias': 'thai', 'title': 'Thai'}]",4.5,"{'latitude': 41.40358, 'longitude': -81.68989}","[pickup, delivery]",,"{'address1': '5869 Broadview Rd', 'address2': ...",12163719575,(216) 371-9575,7899.057477


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
212,mEx2NJBepbnVSjZG6MqwJA,panera-bread-westlake-2,Panera Bread,https://s3-media1.fl.yelpcdn.com/bphoto/4TBLB4...,False,https://www.yelp.com/biz/panera-bread-westlake...,53,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",2.0,"{'latitude': 41.46829, 'longitude': -81.90929}","[pickup, delivery]",$$,"{'address1': '26137 Detroit Road', 'address2':...",14408994944,(440) 899-4944,19746.927637
213,RYYzq9MFwXhrGSPeWtUwEA,zoup-mentor,Zoup!,https://s3-media1.fl.yelpcdn.com/bphoto/kVnPAi...,False,https://www.yelp.com/biz/zoup-mentor?adjust_cr...,35,"[{'alias': 'salad', 'title': 'Salad'}, {'alias...",3.0,"{'latitude': 41.6530789, 'longitude': -81.3799...","[pickup, delivery]",$,{'address1': '7327 Mentor Ave Points East Shop...,14407014110,(440) 701-4110,31482.139419
214,Fkckopw1GL4cQzh9Buv6DA,panera-bread-north-olmsted,Panera Bread,https://s3-media2.fl.yelpcdn.com/bphoto/aPxgZ-...,False,https://www.yelp.com/biz/panera-bread-north-ol...,34,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",2.5,"{'latitude': 41.41836, 'longitude': -81.91169}","[pickup, delivery]",$,"{'address1': '26086 Brookpark Road', 'address2...",14408019023,(440) 801-9023,20862.824369
215,UrVTbcIAitKm2Z8J-uNREQ,panera-bread-brunswick,Panera Bread,https://s3-media1.fl.yelpcdn.com/bphoto/d7SMN3...,False,https://www.yelp.com/biz/panera-bread-brunswic...,36,"[{'alias': 'salad', 'title': 'Salad'}, {'alias...",2.0,"{'latitude': 41.2373089229639, 'longitude': -8...","[pickup, delivery]",$,"{'address1': '1405 Parschen Blvd', 'address2':...",13302204408,(330) 220-4408,28753.134727
216,ZkgijJ3jiVb1H9K26UhGdA,panera-bread-strongsville,Panera Bread,https://s3-media1.fl.yelpcdn.com/bphoto/papNkw...,False,https://www.yelp.com/biz/panera-bread-strongsv...,38,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",2.5,"{'latitude': 41.31479103099124, 'longitude': -...",[delivery],$,"{'address1': '17800 Royalton Rd', 'address2': ...",14408460377,(440) 846-0377,21743.027824


In [19]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()


0

In [20]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()



0

In [21]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_CLE_Thai.csv.gz', compression='gzip',index=False)

