In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
#Load Credentials
with open('/Users/jfabe/.secret/yelp_api.json') as f:
    login=json.load(f)
    
#Instantiate YelpAPI
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [5]:
JSON_FILE = f"Data/results_in_prog_SF_pizza.json"
JSON_FILE

'Data/results_in_prog_SF_pizza.json'

In [8]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    #Check if exists
    file_exists=os.path.isfile(JSON_FILE)
    
    #If it DOES exist
    if file_exists==True:
        if delete_if_exists==True:
            print(f"[!]{JSON_FILE} already exists. Deleting previous file...")
            os.remove(JSON_FILE)
        else:
            print(f"[!]{JSON_FILE} already exists.")
            
            
    #If it DOESN'T exist
    else:
        print(f"[!]{JSON_FILE} not found. Saving empty list to new file.")
        
        #Create folder if needed
        folder = os.path.dirname(JSON_FILE)
        if len(folder)>0:
            os.makedirs(folder,exist_ok=True)
        
        with open(JSON_FILE,'w') as f:
            json.dump([],f)

In [9]:
#Create a new empty json file with our function
create_json_file(JSON_FILE)

[!]Data/results_in_prog_SF_pizza.json not found. Saving empty list to new file.


In [10]:
#Create variables for our search criteria
location = "Sioux Falls, SD"
term = "pizza"

#Create a variable for our previous results
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)
    
#Create a variable for number of results (length of our json file) - we will use this as offset
n_results = len(previous_results)
    
#Store our YelpAPI results in a variable
results = yelp_api.search_query(location=location, term=term, offset=n_results)

In [11]:
#Check total results from our search
total_results = results['total']
total_results

137

In [13]:
#confirm results per page
results_per_page = len(results['businesses'])
results_per_page

20

In [14]:
#Determine the number of pages we have in our results
n_pages = math.ceil((results['total']-n_results)/results_per_page)
n_pages

7

In [15]:
#Create a for-loop to run through our pages and save/append them to the json file

for i in tqdm_notebook(range(1,n_pages+1)):
    
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
        
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print("Exceeded 1000 API calls. Stopping loop")
        break
        
        
    results = yelp_api.search_query(location=location, term=term, offset=n_results)
    
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results,f)
        
    time.sleep(.2)    

  0%|          | 0/7 [00:00<?, ?it/s]

In [16]:
#Check out final results in a dataframe

final_df = pd.read_json(JSON_FILE)
display(final_df.head(2), final_df.tail(2))

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,eZzz0bu7FnMtvWAS6RL7RQ,pizza-di-paolo-sioux-falls,Pizza di Paolo,https://s3-media4.fl.yelpcdn.com/bphoto/bKlcr9...,False,https://www.yelp.com/biz/pizza-di-paolo-sioux-...,26,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 43.5236937890844, 'longitude': -9...",[],"{'address1': '2300 S Minnesota Ave', 'address2...",16052713935,(605) 271-3935,1121.940229,
1,RqzB5GHdyKbMiF5FPLZ8ug,pizza-cheeks-sioux-falls,Pizza Cheeks,https://s3-media3.fl.yelpcdn.com/bphoto/ESRDU8...,False,https://www.yelp.com/biz/pizza-cheeks-sioux-fa...,20,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 43.54656, 'longitude': -96.72637}",[],"{'address1': '120 S Phillips Ave', 'address2':...",16052710974,(605) 271-0974,1688.354192,


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
135,6NJYMQhEyfCGWImjshuWhA,popeyes-louisiana-kitchen-sioux-falls,Popeyes Louisiana Kitchen,https://s3-media4.fl.yelpcdn.com/bphoto/UcCOJC...,False,https://www.yelp.com/biz/popeyes-louisiana-kit...,28,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",2.0,"{'latitude': 43.5469339136705, 'longitude': -9...","[pickup, delivery]","{'address1': '2208 E 10th St', 'address2': '',...",16052714102,(605) 271-4102,3420.082527,$
136,f6PBBzEb4iAehxO45qS-jg,chilis-sioux-falls,Chili's,https://s3-media1.fl.yelpcdn.com/bphoto/ZgFwWe...,False,https://www.yelp.com/biz/chilis-sioux-falls?ad...,41,"[{'alias': 'bars', 'title': 'Bars'}, {'alias':...",2.5,"{'latitude': 43.5149749, 'longitude': -96.7725...","[pickup, delivery]","{'address1': '3720 W 41st St', 'address2': '',...",16053613900,(605) 361-3900,3553.195081,$$


In [18]:
#Quick check for duplicates

final_df.duplicated(subset='id').sum()

0

In [19]:
#Save our results to CSV in our data folder

final_df.to_csv('Data/final_results_SF_pizza.csv', index=False)