# Imports

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Adding in API JSON credentials
with open('/Users/G3NTL3G1ANT/.secret/yelp_api.json') as f: 
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [4]:
# set our API call parameters 
LOCATION = 'Daly City,CA'
TERM = 'Sushi'

In [5]:
# Specifying JSON_FILEC filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_CA_sushi.json"
JSON_FILE

'Data/results_in_progress_CA_sushi.json'

In [6]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    # Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    # If it DOES exist
    if file_exists == True:
        # Check if user wants to delete if exists
        if delete_if_exists==True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            # Delete file and confirm it no longer exists
            os.remove(JSON_FILE)
            # Recursuve call to function after old file is deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
    # If it does NOT exist
    else:
        # INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        # CREATE ANY NEEDED FOLDERS
        # Get the FOLDER NAME only
        folder = os.path.dirname(JSON_FILE)
        # If JSON_FILE included a folder:
        if len(folder)>0:
            # Create the folder
            os.makedirs(folder,exist_ok=True)
            # Save empty list to start the JSON_FILE
            with open(JSON_FILE, 'w') as f:
                json.dump([],f)

In [7]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)

[!] Data/results_in_progress_CA_sushi.json already exists. Deleting previous file...
[i] Data/results_in_progress_CA_sushi.json not found. Saving empty list to new file.
- 0 previous results found.


In [8]:
## How many results total?
total_results = results['total']
total_results

271

In [9]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [10]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

14

In [11]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/14 [00:00<?, ?it/s]

In [12]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,4cXj-H3qt-fE4Ly8giRP-A,izumi-revolving-sushi-daly-city-2,Izumi Revolving Sushi,https://s3-media2.fl.yelpcdn.com/bphoto/caj-Qe...,False,https://www.yelp.com/biz/izumi-revolving-sushi...,262,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",3.5,"{'latitude': 37.671829010252814, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '94 Serramonte Ctr', 'address2': ...",16509855555,(650) 985-5555,1800.386078
1,KQzMMqzpxhVtdgSpJPeXyg,tom-san-ramen-daly-city,Tom San Ramen,https://s3-media4.fl.yelpcdn.com/bphoto/yfZO47...,False,https://www.yelp.com/biz/tom-san-ramen-daly-ci...,305,"[{'alias': 'ramen', 'title': 'Ramen'}, {'alias...",4.0,"{'latitude': 37.697134, 'longitude': -122.48377}","[pickup, delivery]",$$,"{'address1': '177 Southgate Ave', 'address2': ...",16509934204,(650) 993-4204,3423.026168
2,wSsuLcdgXat_g69PswYWxw,ichika-sushi-house-brisbane,Ichika Sushi House,https://s3-media3.fl.yelpcdn.com/bphoto/AFE4nV...,False,https://www.yelp.com/biz/ichika-sushi-house-br...,723,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 37.68559, 'longitude': -122.39976}","[pickup, delivery]",$$,"{'address1': '118 Old County Rd', 'address2': ...",14158255790,(415) 825-5790,4750.381852
3,0K6O21FH30efT_nDB_ALIg,tomo-sushi-and-teriyaki-daly-city,Tomo Sushi & Teriyaki,https://s3-media3.fl.yelpcdn.com/bphoto/buQhI5...,False,https://www.yelp.com/biz/tomo-sushi-and-teriya...,606,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",3.5,"{'latitude': 37.7020804132731, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '1901 Junipero Serra Blvd', 'addr...",16509911045,(650) 991-1045,3055.38201
4,m-X2DibsCMVNrkgiN2lo6g,kamu-sushi-south-san-francisco,Kamu Sushi,https://s3-media1.fl.yelpcdn.com/bphoto/3M8dsJ...,False,https://www.yelp.com/biz/kamu-sushi-south-san-...,652,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.0,"{'latitude': 37.6548865, 'longitude': -122.408...","[delivery, restaurant_reservation]",$$,"{'address1': '112 Grand Ave', 'address2': '', ...",16509522888,(650) 952-2888,4720.70819


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
266,C9YDe3DdURTN01ZaTefWWw,safeway-san-francisco-21,Safeway,https://s3-media4.fl.yelpcdn.com/bphoto/srKv5b...,False,https://www.yelp.com/biz/safeway-san-francisco...,266,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.0,"{'latitude': 37.743262, 'longitude': -122.4739...",[],$$,"{'address1': '730 Taraval St', 'address2': '',...",14156654136,(415) 665-4136,7470.586859
267,NDwLp5NXPU7bvnxdsVYntw,safeway-san-francisco-17,Safeway,https://s3-media4.fl.yelpcdn.com/bphoto/WkLsHi...,False,https://www.yelp.com/biz/safeway-san-francisco...,291,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.5,"{'latitude': 37.7435706998295, 'longitude': -1...",[],$$,"{'address1': '5290 Diamond Heights Blvd', 'add...",14158247744,(415) 824-7744,7352.131724
268,P-PhLMdBTZSPC4Z_IS3i0w,serranos-pizza-san-francisco-2,Serrano's Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/VA1fD_...,False,https://www.yelp.com/biz/serranos-pizza-san-fr...,662,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.5,"{'latitude': 37.75714, 'longitude': -122.42051}","[pickup, delivery]",$,"{'address1': '3274 21st St', 'address2': '', '...",14156951615,(415) 695-1615,9203.601452
269,ADFGHvCR1e5MvF-BD-6qdA,the-plant-cafe-organic-san-francisco-9,The Plant Cafe Organic,https://s3-media1.fl.yelpcdn.com/bphoto/qF_NFB...,False,https://www.yelp.com/biz/the-plant-cafe-organi...,348,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",3.0,"{'latitude': 37.6172703, 'longitude': -122.381...",[],$$,"{'address1': '173 Airport Access Rd', 'address...",16508219290,(650) 821-9290,9263.749899
270,8nh53RlytiFpqtA3eTh7gw,dominos-pizza-san-francisco,Domino's Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/V4MWdf...,False,https://www.yelp.com/biz/dominos-pizza-san-fra...,299,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",2.0,"{'latitude': 37.7534963038738, 'longitude': -1...","[pickup, delivery]",$,"{'address1': '3116 Noriega St', 'address2': No...",14156818100,(415) 681-8100,9214.852991


In [13]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

In [14]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_CA_sushi.csv.gz', compression='gzip',index=False)