In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook


In [2]:
# Load API Credentials
with open('../.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)


In [3]:
# set our API call parameters 
LOCATION = 'New York City, NY'
TERM = 'ice cream'


In [4]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "./data/results_in_progress_NY_iceCream.json"
JSON_FILE


'./data/results_in_progress_NY_iceCream.json'

In [19]:
def create_json_file(JSON_FILE, delete_if_exists=False):
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")
    ## If it does NOT exist:
    else:
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f) 
            

In [20]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages


[i] ./data/results_in_progress_NY_iceCream.json not found. Saving empty list to new file.
- 0 previous results found.


600

In [21]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)


  0%|          | 0/600 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [22]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,QMDzs-PgUPbZCGLYorLn8A,brooklyn-ice-cream-factory-brooklyn-3,Brooklyn Ice Cream Factory,https://s3-media1.fl.yelpcdn.com/bphoto/WJBI9y...,False,https://www.yelp.com/biz/brooklyn-ice-cream-fa...,328,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.0,"{'latitude': 40.702635, 'longitude': -73.994553}",[],$$,"{'address1': '14 Old Fulton St', 'address2': '...",17185225211,(718) 522-5211,313.208823
1,7-jMpWGUxPB8EEtmoqNdqg,soft-swerve-new-york-2,Soft Swerve,https://s3-media1.fl.yelpcdn.com/bphoto/YjDIa-...,False,https://www.yelp.com/biz/soft-swerve-new-york-...,1782,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 40.71819, 'longitude': -73.99118}","[delivery, pickup]",$$,"{'address1': '85B Allen St', 'address2': '', '...",16464766311,(646) 476-6311,1438.492938
2,b9Qd4jaxugk7Zo7LE_lLrg,oddfellows-ice-cream-co-brooklyn-7,OddFellows Ice Cream Co.,https://s3-media4.fl.yelpcdn.com/bphoto/nBOC76...,False,https://www.yelp.com/biz/oddfellows-ice-cream-...,152,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.0,"{'latitude': 40.70325596349647, 'longitude': -...","[delivery, pickup]",$$,"{'address1': '44 Water St', 'address2': '', 'a...",17186835755,(718) 683-5755,302.228468
3,vk7W3_sQwr7eZbRFsXv6rw,taiyaki-nyc-new-york,Taiyaki NYC,https://s3-media3.fl.yelpcdn.com/bphoto/Shd3Gq...,False,https://www.yelp.com/biz/taiyaki-nyc-new-york?...,3329,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 40.71789, 'longitude': -73.9988}","[delivery, pickup]",$,"{'address1': '119 Baxter St', 'address2': '', ...",12129662882,(212) 966-2882,1439.725809
4,G59rirqi-xoGq5mpoJgv3A,sugar-hill-creamery-brooklyn,Sugar Hill Creamery,https://s3-media2.fl.yelpcdn.com/bphoto/Hxox24...,False,https://www.yelp.com/biz/sugar-hill-creamery-b...,19,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 40.70356978099554, 'longitude': -...",[],,"{'address1': '55 Water St', 'address2': '', 'a...",12126349004,(212) 634-9004,274.910053


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,j0Q-yyxS2WiL3foCJEV4eg,dirt-candy-new-york-2,Dirt Candy,https://s3-media1.fl.yelpcdn.com/bphoto/QaHPTO...,False,https://www.yelp.com/biz/dirt-candy-new-york-2...,819,"[{'alias': 'vegetarian', 'title': 'Vegetarian'...",4.0,"{'latitude': 40.7179075, 'longitude': -73.9907...","[delivery, pickup]",$$$$,"{'address1': '86 Allen St', 'address2': '', 'a...",12122287732,(212) 228-7732,1413.704598
996,8bI7p88w4uiHfk4R--1k8w,cake-ambiance-brooklyn,Cake Ambiance,https://s3-media1.fl.yelpcdn.com/bphoto/2HGjwn...,False,https://www.yelp.com/biz/cake-ambiance-brookly...,163,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.0,"{'latitude': 40.65093, 'longitude': -73.95234}","[delivery, pickup]",$$,"{'address1': '813 Rogers Ave', 'address2': '',...",17186232253,(718) 623-2253,7025.908948
997,gTEoEsssfbjl76Opf5bP9g,l-amico-nyc-new-york,L'Amico NYC,https://s3-media2.fl.yelpcdn.com/bphoto/aDjdvg...,False,https://www.yelp.com/biz/l-amico-nyc-new-york?...,578,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",3.5,"{'latitude': 40.7469265, 'longitude': -73.9900...","[delivery, pickup]",$$$,"{'address1': '849 6th Ave', 'address2': '', 'a...",12122014065,(212) 201-4065,4626.10657
998,2VpurA2lb-q90nmALklNcA,perry-st-new-york,Perry St,https://s3-media2.fl.yelpcdn.com/bphoto/Sou6iO...,False,https://www.yelp.com/biz/perry-st-new-york?adj...,1004,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 40.73442, 'longitude': -74.00971}","[delivery, pickup]",$$$,"{'address1': '176 Perry St', 'address2': '', '...",12123521900,(212) 352-1900,3474.641743
999,yGFQFm3O9vCsKTVpvmDuGg,dinner-party-brooklyn,Dinner Party,https://s3-media4.fl.yelpcdn.com/bphoto/OU3orE...,False,https://www.yelp.com/biz/dinner-party-brooklyn...,33,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 40.68692, 'longitude': -73.97469}",[],,"{'address1': '86 S Portland Ave', 'address2': ...",13474639173,(347) 463-9173,2641.735075


In [23]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()


98

In [24]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()


0

In [25]:
# save the final results to a compressed csv
final_df.to_csv('./data/final_results_NY_iceCream.csv.gz', compression='gzip',index=False)
