# import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from yelpapi import YelpAPI
import os, json, math, time
from tqdm.notebook import tqdm_notebook


# Credentials and accessing the API

In [2]:
# Load API credentials
with open('/Users/ERNESTO/.secret/yelp_api.json') as f:
    login = json.load(f)
    
# Instantiate YelpAPI variable
yelp_api = YelpAPI(login['api-key'], timeout_s = 5.0)

# Define search

In [3]:
LOCATION = 'Portland, OR'
TERM = 'sushi'

# Create a results-in-progress JSON file, but only if it doesn't exist

In [4]:
# specifying Json_FILE filename

JSON_FILE = "Data/results_in_progress_sushi_Portland_OR.json"
JSON_FILE

'Data/results_in_progress_sushi_Portland_OR.json'

# Check if our JSON_FILE already exists

This will prevent us from accidentally overwriting an existing file.

If it doesn't exist:

Create any folders needed for the file path.

Save an empty list as JSON_File

So let's make a create_json_file function that accepts the JSON_FILE filename as first argument and a second argument called delete_if_exists and set to it to False by default.

This way, it will not automatically delete previous search results. We will have to explicitly say delete_if_exists = True to do so.

In [5]:
def create_json_file(JSON_FILE, delete_if_exists = False):
    # check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    # if it does exist:
    if file_exists == True:
        # check if user wants to delete if exists
        if delete_if_exists == True:
            print(f"[i]{JSON_FILE} already exists. Deleting previous file.")
            # delete file and confirm it no longer exists
            os.remove(JSON_FILE)
            # recursive call to function after old file deleted
            create_json_file(JSON_FILE, delete_if_exists = False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
            
    # if it does not
    else:
        # inform user and save empty list
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        # create any needed folders
        # get yhe folder name only
        folder = os.path.dirname(JSON_FILE)
        # IF JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder, exist_ok = True)
            
        # Save empty list to star the json file
        with open (JSON_FILE, 'w') as f:
            json.dump([],f)
            

Now that we have our new function, we can use it with delete_if_exists=True to delete our previous results and start over

In [6]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')



[i] Data/results_in_progress_sushi_Portland_OR.json not found. Saving empty list to new file.
- 0 previous results found.


In [7]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
print('total_results =', total_results)
## How many did we get the details for?
results_per_page = len(results['businesses'])
print ('results_per_page =', results_per_page)
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
print('n_pages =', n_pages)

# Make a for loop if total result > 1000 stop loop and do not get yelpAPIError

for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)
    


total_results = 584
results_per_page = 20
n_pages = 30


  0%|          | 0/30 [00:00<?, ?it/s]

# After the loop has finished convert .json to dataframe

In [8]:
final_df = pd.read_json(JSON_FILE)
final_df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,PovmZV8Bolu2BizxN2d9Zg,bluefin-tuna-and-sushi-portland,Bluefin Tuna & Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/_GslUm...,False,https://www.yelp.com/biz/bluefin-tuna-and-sush...,196,"[{'alias': 'sushi', 'title': 'Sushi Bars'}]",4.5,"{'latitude': 45.535234, 'longitude': -122.651657}","[pickup, delivery]",$$,"{'address1': '1337 NE Broadway St', 'address2'...",+15032816804,(503) 281-6804,2161.425790
1,90nu9SmFfQaXsoWoZGB0mQ,murata-restaurant-portland,Murata Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/TiwioR...,False,https://www.yelp.com/biz/murata-restaurant-por...,359,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 45.511592, 'longitude': -122.678604}","[pickup, delivery]",$$,"{'address1': '200 SW Market St', 'address2': '...",+15032270080,(503) 227-0080,1962.210740
2,TEjeHO-5NVqXoGJeg-sj5A,kashiwagi-portland,Kashiwagi,https://s3-media1.fl.yelpcdn.com/bphoto/SE0z38...,False,https://www.yelp.com/biz/kashiwagi-portland?ad...,79,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 45.50500365152785, 'longitude': -...",[delivery],$$,"{'address1': '2425 SE 26th Ave', 'address2': '...",+15032333946,(503) 233-3946,1635.976639
3,gYHqxhVPjU45EiIivYbfUw,yama-sushi-and-sake-bar-portland-3,Yama Sushi & Sake Bar,https://s3-media2.fl.yelpcdn.com/bphoto/SSs3Fe...,False,https://www.yelp.com/biz/yama-sushi-and-sake-b...,940,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 45.52968970972, 'longitude': -122...","[delivery, pickup]",$$,"{'address1': '926 NW 10th Ave', 'address2': ''...",+15038415463,(503) 841-5463,2604.698967
4,eSFa2XHBeIwYi3Oq-dSbzw,koya-sushi-beaverton,Koya Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/R5gwLs...,False,https://www.yelp.com/biz/koya-sushi-beaverton?...,125,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",5.0,"{'latitude': 45.48627, 'longitude': -122.80578}","[pickup, delivery]",,"{'address1': '12570 SW 1st St', 'address2': No...",+15035672650,(503) 567-2650,12271.791288
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
579,HQs9i3YbfD7yHAeGZ6VZKQ,fred-meyer-hillsboro-4,Fred Meyer,https://s3-media2.fl.yelpcdn.com/bphoto/VcMF2x...,False,https://www.yelp.com/biz/fred-meyer-hillsboro-...,37,"[{'alias': 'electronics', 'title': 'Electronic...",2.0,"{'latitude': 45.55095, 'longitude': -122.90336}",[],,"{'address1': '22075 NE Imbrie Dr', 'address2':...",+15037471100,(503) 747-1100,19829.871179
580,wqJbd2b1kXlePzRKJtPQIw,chucks-produce-and-street-market-vancouver,Chuck's Produce & Street Market,https://s3-media3.fl.yelpcdn.com/bphoto/HrFfrH...,False,https://www.yelp.com/biz/chucks-produce-and-st...,274,"[{'alias': 'grocery', 'title': 'Grocery'}, {'a...",3.5,"{'latitude': 45.618329932612, 'longitude': -12...",[delivery],$$,"{'address1': '13215 SE Mill Plain Blvd', 'addr...",+13605972700,(360) 597-2700,14583.405158
581,PdjdEYS_bVDDpP53ySIHcQ,fred-meyer-tigard-3,Fred Meyer,https://s3-media4.fl.yelpcdn.com/bphoto/r3alQ-...,False,https://www.yelp.com/biz/fred-meyer-tigard-3?a...,91,"[{'alias': 'deptstores', 'title': 'Department ...",2.5,"{'latitude': 45.44117, 'longitude': -122.752164}",[],$$,"{'address1': '11565 Sw Pacific Hwy', 'address2...",+15032937053,(503) 293-7053,11281.805444
582,RvfpNLtswxPRBBNApCTCIA,fred-meyer-fuel-center-hillsboro,Fred Meyer Fuel Center,https://s3-media2.fl.yelpcdn.com/bphoto/zsJLeb...,False,https://www.yelp.com/biz/fred-meyer-fuel-cente...,91,"[{'alias': 'servicestations', 'title': 'Gas St...",3.0,"{'latitude': 45.5498713299954, 'longitude': -1...",[],$$,"{'address1': '22075 NW Imbrie Dr', 'address2':...",+15037471100,(503) 747-1100,19829.631520


In [9]:
# check for duplicate ID's 
final_df.duplicated(subset = 'id').sum()



0

In [10]:
# save the final results to a compressed csv
final_df.to_csv('Data/results_in_progress_sushi_Portland_OR.csv.gz', compression='gzip',index=False)

