In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook



In [2]:
# Install tmdbsimple (only need to run once)
!pip install tqdm



In [3]:
# Load API Credentials
with open('/Users/swaghanaian/secret/yelp_api.json') as f: 
    login = json.load(f)
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)



In [5]:
login.keys()

dict_keys(['client-id', 'api-key'])

In [6]:
# set our API call parameters 
LOCATION = 'brooklyn,NY'
TERM = 'Pizza'

In [7]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_NY_pizza.json"
JSON_FILE



'Data/results_in_progress_NY_pizza.json'

In [8]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  



In [9]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages



[i] Data/results_in_progress_NY_pizza.json not found. Saving empty list to new file.
- 0 previous results found.


560

In [10]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)



  0%|          | 0/560 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [11]:
# load final results in a data frame
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,tpcCX_SHaNXqAzXOzbluJQ,fini-pizza-brooklyn,Fini Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/UEW7bE...,False,https://www.yelp.com/biz/fini-pizza-brooklyn?a...,12,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.71348, 'longitude': -73.96182}",[],"{'address1': '305 Bedford Ave', 'address2': ''...",,,7054.533194,
1,ysqgdbSrezXgVwER2kQWKA,julianas-brooklyn-3,Juliana's,https://s3-media1.fl.yelpcdn.com/bphoto/OCDZ4n...,False,https://www.yelp.com/biz/julianas-brooklyn-3?a...,2523,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.70274718768062, 'longitude': -...",[delivery],"{'address1': '19 Old Fulton St', 'address2': '...",17185966700.0,(718) 596-6700,7226.45463,$$
2,xRiLLXjeM2wOvVTYJH2__A,barboncino-pizza-and-bar-brooklyn-2,Barboncino Pizza & Bar,https://s3-media1.fl.yelpcdn.com/bphoto/vOe8cj...,False,https://www.yelp.com/biz/barboncino-pizza-and-...,1079,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.67206, 'longitude': -73.95717}",[delivery],"{'address1': '781 Franklin Ave', 'address2': '...",17184838834.0,(718) 483-8834,2663.491653,$$
3,3VJwpmmeoPlCN3J5p5miEQ,brooklyns-homeslice-pizzeria-brooklyn,Brooklyn's Homeslice Pizzeria,https://s3-media1.fl.yelpcdn.com/bphoto/PFbKEy...,False,https://www.yelp.com/biz/brooklyns-homeslice-p...,181,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.680702, 'longitude': -73.967455}","[pickup, delivery]","{'address1': '567 Vanderbilt Ave', 'address2':...",17183999000.0,(718) 399-9000,3945.532545,$
4,C8j0q4Ma_S5hBGuAI-aaww,di-fara-pizza-brooklyn,Di Fara Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/Et320n...,False,https://www.yelp.com/biz/di-fara-pizza-brookly...,3837,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.625093, 'longitude': -73.961531}","[pickup, delivery]","{'address1': '1424 Ave J', 'address2': '', 'ad...",17182581367.0,(718) 258-1367,3559.746344,$$


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
995,0eHYheq3fwDGKAaGtxUGtQ,panino-toasty-delicious-brooklyn-3,Panino Toasty Delicious,https://s3-media2.fl.yelpcdn.com/bphoto/gPzjAS...,False,https://www.yelp.com/biz/panino-toasty-delicio...,1,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",5.0,"{'latitude': 40.63206, 'longitude': -73.995}","[pickup, delivery]","{'address1': '5401 13th Ave', 'address2': None...",17185766839.0,(718) 576-6839,5203.292145,
996,VrP6mI9Ihf2qt8dH6XwnYg,proto-s-pizza-new-york-2,Proto’s Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/AdbSxI...,False,https://www.yelp.com/biz/proto-s-pizza-new-yor...,111,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.725353, 'longitude': -73.989746}","[pickup, delivery]","{'address1': '50 2nd Ave', 'address2': None, '...",12124324044.0,(212) 432-4044,9165.059693,$
997,dWBDxX4E_QDYxLRGPzU5Jg,bene-restaurant-and-pizzeria-brooklyn,Bene Restaurant & Pizzeria,https://s3-media3.fl.yelpcdn.com/bphoto/YKcBna...,False,https://www.yelp.com/biz/bene-restaurant-and-p...,57,"[{'alias': 'pizza', 'title': 'Pizza'}]",2.5,"{'latitude': 40.6547993572564, 'longitude': -7...","[pickup, delivery]","{'address1': '157 Prospect Park SW', 'address2...",17186860046.0,(718) 686-0046,2895.130837,$
998,wKku3dhD7ZUT1eGQxfY7uA,robertas-pizza-new-york-3,Roberta's Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/IQaAqs...,False,https://www.yelp.com/biz/robertas-pizza-new-yo...,37,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.5,"{'latitude': 40.75686, 'longitude': -73.97228}",[],"{'address1': '570 Lexington Ave', 'address2': ...",,,11949.81265,
999,QY2VGbMfun6BCJCT6l_lcg,friendly-gourmet-pizza-new-york,Friendly Gourmet Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/r0H1Du...,False,https://www.yelp.com/biz/friendly-gourmet-pizz...,61,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.70925, 'longitude': -74.0088}","[pickup, delivery]","{'address1': '59 Nassau St', 'address2': '', '...",12127911800.0,(212) 791-1800,8630.902446,$


In [13]:
# check for duplicate results
final_df.replace("'",'"')



Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,tpcCX_SHaNXqAzXOzbluJQ,fini-pizza-brooklyn,Fini Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/UEW7bE...,False,https://www.yelp.com/biz/fini-pizza-brooklyn?a...,12,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.71348, 'longitude': -73.96182}",[],"{'address1': '305 Bedford Ave', 'address2': ''...",,,7054.533194,
1,ysqgdbSrezXgVwER2kQWKA,julianas-brooklyn-3,Juliana's,https://s3-media1.fl.yelpcdn.com/bphoto/OCDZ4n...,False,https://www.yelp.com/biz/julianas-brooklyn-3?a...,2523,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.70274718768062, 'longitude': -...",[delivery],"{'address1': '19 Old Fulton St', 'address2': '...",+17185966700,(718) 596-6700,7226.454630,$$
2,xRiLLXjeM2wOvVTYJH2__A,barboncino-pizza-and-bar-brooklyn-2,Barboncino Pizza & Bar,https://s3-media1.fl.yelpcdn.com/bphoto/vOe8cj...,False,https://www.yelp.com/biz/barboncino-pizza-and-...,1079,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.67206, 'longitude': -73.95717}",[delivery],"{'address1': '781 Franklin Ave', 'address2': '...",+17184838834,(718) 483-8834,2663.491653,$$
3,3VJwpmmeoPlCN3J5p5miEQ,brooklyns-homeslice-pizzeria-brooklyn,Brooklyn's Homeslice Pizzeria,https://s3-media1.fl.yelpcdn.com/bphoto/PFbKEy...,False,https://www.yelp.com/biz/brooklyns-homeslice-p...,181,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.680702, 'longitude': -73.967455}","[pickup, delivery]","{'address1': '567 Vanderbilt Ave', 'address2':...",+17183999000,(718) 399-9000,3945.532545,$
4,C8j0q4Ma_S5hBGuAI-aaww,di-fara-pizza-brooklyn,Di Fara Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/Et320n...,False,https://www.yelp.com/biz/di-fara-pizza-brookly...,3837,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.625093, 'longitude': -73.961531}","[pickup, delivery]","{'address1': '1424 Ave J', 'address2': '', 'ad...",+17182581367,(718) 258-1367,3559.746344,$$
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0eHYheq3fwDGKAaGtxUGtQ,panino-toasty-delicious-brooklyn-3,Panino Toasty Delicious,https://s3-media2.fl.yelpcdn.com/bphoto/gPzjAS...,False,https://www.yelp.com/biz/panino-toasty-delicio...,1,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",5.0,"{'latitude': 40.63206, 'longitude': -73.995}","[pickup, delivery]","{'address1': '5401 13th Ave', 'address2': None...",+17185766839,(718) 576-6839,5203.292145,
996,VrP6mI9Ihf2qt8dH6XwnYg,proto-s-pizza-new-york-2,Proto’s Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/AdbSxI...,False,https://www.yelp.com/biz/proto-s-pizza-new-yor...,111,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.725353, 'longitude': -73.989746}","[pickup, delivery]","{'address1': '50 2nd Ave', 'address2': None, '...",+12124324044,(212) 432-4044,9165.059693,$
997,dWBDxX4E_QDYxLRGPzU5Jg,bene-restaurant-and-pizzeria-brooklyn,Bene Restaurant & Pizzeria,https://s3-media3.fl.yelpcdn.com/bphoto/YKcBna...,False,https://www.yelp.com/biz/bene-restaurant-and-p...,57,"[{'alias': 'pizza', 'title': 'Pizza'}]",2.5,"{'latitude': 40.6547993572564, 'longitude': -7...","[pickup, delivery]","{'address1': '157 Prospect Park SW', 'address2...",+17186860046,(718) 686-0046,2895.130837,$
998,wKku3dhD7ZUT1eGQxfY7uA,robertas-pizza-new-york-3,Roberta's Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/IQaAqs...,False,https://www.yelp.com/biz/robertas-pizza-new-yo...,37,"[{'alias': 'pizza', 'title': 'Pizza'}]",3.5,"{'latitude': 40.75686, 'longitude': -73.97228}",[],"{'address1': '570 Lexington Ave', 'address2': ...",,,11949.812650,


In [15]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()



171

In [16]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [17]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_NY_pizza.csv.gz', compression='gzip',index=False)

