In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/tanma/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
LOCATION = 'Austin, TX,78701'
TERM = 'Ramen'

In [5]:
JSON_FILE = f"Data/results_in_progress_Ramen.json"
JSON_FILE

'Data/results_in_progress_Ramen.json'

In [6]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_Ramen.json not found. Saving empty list to file.


In [7]:
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [8]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [9]:
total_results = results['total']
total_results

77

In [10]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [11]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages


4

In [12]:
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [13]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2) 

  0%|          | 0/4 [00:00<?, ?it/s]

In [14]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
#     display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,uwFa-pTQL1Rv4nULoIraNw,ramen-tatsu-ya-austin-2,Ramen Tatsu-Ya,https://s3-media4.fl.yelpcdn.com/bphoto/rNrHEy...,False,https://www.yelp.com/biz/ramen-tatsu-ya-austin...,1879,"[{'alias': 'ramen', 'title': 'Ramen'}]",4.0,"{'latitude': 30.253899793553, 'longitude': -97...",[],$$,"{'address1': '1234 S Lamar Blvd', 'address2': ...",15128935561,(512) 893-5561,2455.800396
1,7ZU9DwctpN_tkC7EOd3C-Q,ramen-tatsu-ya-austin-6,Ramen Tatsu-Ya,https://s3-media2.fl.yelpcdn.com/bphoto/5QE3yx...,False,https://www.yelp.com/biz/ramen-tatsu-ya-austin...,585,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 30.26326, 'longitude': -97.72624}",[],$$,"{'address1': '1600 E 6th St', 'address2': None...",15128935561,(512) 893-5561,1647.061405
2,yqL1P6wcSZYQAgpO3TfhBQ,kemuri-tatsu-ya-austin-2,Kemuri Tatsu-ya,https://s3-media3.fl.yelpcdn.com/bphoto/YloCTt...,False,https://www.yelp.com/biz/kemuri-tatsu-ya-austi...,784,"[{'alias': 'izakaya', 'title': 'Izakaya'}, {'a...",4.0,"{'latitude': 30.25386, 'longitude': -97.7129}",[delivery],$$,"{'address1': '2713 E 2nd St', 'address2': '', ...",15128032224,(512) 803-2224,3231.061743
3,HKi7fd2bTzZfVB_Dz9vGng,lucky-robot-austin,Lucky Robot,https://s3-media4.fl.yelpcdn.com/bphoto/-8j6xG...,False,https://www.yelp.com/biz/lucky-robot-austin?ad...,1205,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 30.250851476015615, 'longitude': ...","[delivery, pickup]",$$,"{'address1': '1303 S Congress Ave', 'address2'...",15124448081,(512) 444-8081,1915.560385
4,4h-O6wPWswMbVowT_lYn9g,tiki-tatsu-ya-austin-2,Tiki Tatsu-Ya,https://s3-media3.fl.yelpcdn.com/bphoto/2kGRBo...,False,https://www.yelp.com/biz/tiki-tatsu-ya-austin-...,161,"[{'alias': 'polynesian', 'title': 'Polynesian'...",4.0,"{'latitude': 30.25382, 'longitude': -97.76331}",[delivery],,"{'address1': '1300 S Lamar Blvd', 'address2': ...",15127723700,(512) 772-3700,2473.601679


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
72,E1CEKrHUzjSBHfr11me8Yg,7-eleven-austin-19,7-Eleven,https://s3-media4.fl.yelpcdn.com/bphoto/PwKkEU...,False,https://www.yelp.com/biz/7-eleven-austin-19?ad...,7,"[{'alias': 'convenience', 'title': 'Convenienc...",3.0,"{'latitude': 30.244661, 'longitude': -97.7296488}","[delivery, pickup]",$,"{'address1': '1705 S Lakeshore Blvd', 'address...",15124417595,(512) 441-7595,2807.881326
73,WlPy1pelng2-VnpmywiQmQ,7-eleven-austin-22,7-Eleven,https://s3-media1.fl.yelpcdn.com/bphoto/qdv0bs...,False,https://www.yelp.com/biz/7-eleven-austin-22?ad...,16,"[{'alias': 'convenience', 'title': 'Convenienc...",1.5,"{'latitude': 30.2259992, 'longitude': -97.7703...","[delivery, pickup]",$,"{'address1': '601 W Ben White Blvd', 'address2...",15124442828,(512) 444-2828,5291.223148
74,djQEHCTOJsdD3a33Aa9lbA,7-eleven-austin-7,7-Eleven,https://s3-media3.fl.yelpcdn.com/bphoto/pWiM6o...,False,https://www.yelp.com/biz/7-eleven-austin-7?adj...,15,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 30.2439924, 'longitude': -97.7822...","[delivery, pickup]",$,"{'address1': '2820 S Lamar', 'address2': '', '...",15124473119,(512) 447-3119,4583.698666
75,aUCvtmP_nJQwzzxYZHRmtw,7-eleven-austin-57,7-Eleven,https://s3-media2.fl.yelpcdn.com/bphoto/2OMj1n...,False,https://www.yelp.com/biz/7-eleven-austin-57?ad...,6,"[{'alias': 'convenience', 'title': 'Convenienc...",2.5,"{'latitude': 30.25795590261023, 'longitude': -...","[delivery, pickup]",$,"{'address1': '863 Airport Blv', 'address2': ''...",15123850821,(512) 385-0821,4834.34155
76,E0Y_WKC62Zv7pRuZCVo87w,7-eleven-austin-9,7-Eleven,https://s3-media3.fl.yelpcdn.com/bphoto/xdlYPQ...,False,https://www.yelp.com/biz/7-eleven-austin-9?adj...,15,"[{'alias': 'convenience', 'title': 'Convenienc...",2.0,"{'latitude': 30.2320290133261, 'longitude': -9...","[delivery, pickup]",$,"{'address1': '1747 East Oltorf', 'address2': N...",15124417508,(512) 441-7508,3947.510455


In [16]:
# check for duplicate IDs
final_df.duplicated(subset='id').sum()

0

In [18]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_Ramen.csv.gz', compression='gzip',index=False)