In [81]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [82]:
# Load API Credentials
with open('Data/config.json') as f:  
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [83]:
# set our API call parameters 
LOCATION = 'NY,NY'
TERM = 'Thai'

In [84]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_ny_thai.json"
JSON_FILE

'Data/results_in_progress_ny_thai.json'

In [85]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_ny_thai.json not found. Saving empty list to file.


In [86]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [87]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [88]:
## How many results total?
total_results = results['total']
total_results

2900

In [89]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [90]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

145

In [91]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [92]:
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/145 [00:00<?, ?it/s]

In [93]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  

In [94]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[!] Data/results_in_progress_ny_thai.json already exists. Deleting previous file...
[i] Data/results_in_progress_ny_thai.json not found. Saving empty list to new file.
- 0 previous results found.


145

In [95]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/145 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [96]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,jjJc_CrkB2HodEinB6cWww,lovemama-new-york,LoveMama,https://s3-media1.fl.yelpcdn.com/bphoto/bLlFKT...,False,https://www.yelp.com/biz/lovemama-new-york?adj...,6576,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.730408722512074, 'longitude': ...","[delivery, restaurant_reservation, pickup]",$$,"{'address1': '174 2nd Ave', 'address2': '', 'a...",12122545370,(212) 254-5370,2859.902795
1,-XYp6w50XbZfS90YddS5ew,soothr-new-york-2,Soothr,https://s3-media2.fl.yelpcdn.com/bphoto/HxjVE7...,False,https://www.yelp.com/biz/soothr-new-york-2?adj...,1157,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.732259, 'longitude': -73.987363}","[delivery, pickup]",$$,"{'address1': '204 E 13th St', 'address2': '', ...",12128449789,(212) 844-9789,3043.263183
2,B3_K2kUVbYOU0VaLcj_LTw,thai-villa-new-york-2,Thai Villa,https://s3-media2.fl.yelpcdn.com/bphoto/PYopFn...,False,https://www.yelp.com/biz/thai-villa-new-york-2...,4845,"[{'alias': 'thai', 'title': 'Thai'}]",4.5,"{'latitude': 40.73902, 'longitude': -73.99065}","[delivery, pickup]",$$,"{'address1': '5 E 19th St', 'address2': 'G Flo...",12128029999,(212) 802-9999,3744.5704
3,0IFDnYf3bhqxJR6hVrG7Gw,top-thai-vintage-new-york-3,Top Thai Vintage,https://s3-media3.fl.yelpcdn.com/bphoto/-ZoEVV...,False,https://www.yelp.com/biz/top-thai-vintage-new-...,1131,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.729907419973344, 'longitude': ...","[delivery, pickup, restaurant_reservation]",$$$,"{'address1': '55 Carmine St', 'address2': None...",16466092272,(646) 609-2272,2845.705425
4,egDEaHpDumYHzRUZ8JBU-w,pranakhon-thai-restaurant-new-york-2,Pranakhon Thai Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/XB_CUH...,False,https://www.yelp.com/biz/pranakhon-thai-restau...,311,"[{'alias': 'thai', 'title': 'Thai'}, {'alias':...",4.5,"{'latitude': 40.73369, 'longitude': -73.99316}","[delivery, pickup]",,"{'address1': '88 University Pl', 'address2': N...",12127866789,(212) 786-6789,3144.403563


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,YbENoUWAXj_UdGK6UXajfw,hudson-blue-bar-and-view-weehawken,Hudson Blue Bar and View,https://s3-media2.fl.yelpcdn.com/bphoto/RJJAJQ...,False,https://www.yelp.com/biz/hudson-blue-bar-and-v...,23,"[{'alias': 'sportsbars', 'title': 'Sports Bars...",4.0,"{'latitude': 40.776739823349494, 'longitude': ...",[delivery],$$,"{'address1': '4800 Ave Port Imperial Blvd', 'a...",12013483200,(201) 348-3200,8048.974132
996,bMhqh_k_QZJMnVstIJzxRw,thái-son-elmhurst,Thái Son,https://s3-media2.fl.yelpcdn.com/bphoto/8I0jtd...,False,https://www.yelp.com/biz/th%C3%A1i-son-elmhurs...,547,"[{'alias': 'vietnamese', 'title': 'Vietnamese'}]",3.5,"{'latitude': 40.74637, 'longitude': -73.89161}",[delivery],$$,"{'address1': '40-10 74th St', 'address2': '', ...",17184766805,(718) 476-6805,9773.038269
997,aGmsJ6_NWw5vnLS43MZLpA,spice-new-york-4,Spice,https://s3-media2.fl.yelpcdn.com/bphoto/EiIMjZ...,False,https://www.yelp.com/biz/spice-new-york-4?adju...,55,"[{'alias': 'chinese', 'title': 'Chinese'}]",2.5,"{'latitude': 40.7900599, 'longitude': -73.97351}","[delivery, pickup]",$,"{'address1': '610 Amsterdam Ave', 'address2': ...",12128750050,(212) 875-0050,9566.504148
998,pe0kLJDb7DEWSKCbOS8D2g,otaiko-hibachi-and-sushi-lounge-bayonne,Otaiko Hibachi & Sushi Lounge,https://s3-media4.fl.yelpcdn.com/bphoto/79friX...,False,https://www.yelp.com/biz/otaiko-hibachi-and-su...,246,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 40.66791160879, 'longitude': -74....","[delivery, pickup]",$$,"{'address1': '125 Lefante Way', 'address2': ''...",12013393399,(201) 339-3399,10052.241383
999,_XLLSXPIjgoZQtwU3RCcQA,hunan-delight-brooklyn-2,Hunan Delight,https://s3-media3.fl.yelpcdn.com/bphoto/aS-LlY...,False,https://www.yelp.com/biz/hunan-delight-brookly...,346,"[{'alias': 'chinese', 'title': 'Chinese'}]",4.0,"{'latitude': 40.6755282563532, 'longitude': -7...","[delivery, pickup]",$$,"{'address1': '752 Union St', 'address2': None,...",17187891400,(718) 789-1400,3596.767124


In [97]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

No duplicates to delete in file.

In [98]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_ny_thai.csv.gz', compression='gzip',index=False)