In [1]:
# Standard Imports
import os
os.environ["OMP_NUM_THREADS"] = '1'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
with open('C:/Users/Mikey/Documents/keys/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1e8d2ae0cd0>

In [4]:
results = yelp_api.search_query(location='Baltimore, MD',
                                       term='Crab Cake')
print(type(results))
results.keys()

<class 'dict'>


dict_keys(['businesses', 'total', 'region'])

# API CALLS

In [5]:
LOCATION = 'Chicago, IL'
TERM = 'Breakfast'

In [6]:
JSON = "Data/results_progress_breakfast.json"
JSON

'Data/results_progress_breakfast.json'

In [7]:
def create_json_file(JSON,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON)
            ## Recursive call to function after old file deleted
            create_json_file(JSON,delete_if_exists=False)
        else:
            print(f"[i] {JSON} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON,'w') as f:
            json.dump([],f)  

In [9]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/results_progress_breakfast.json not found. Saving empty list to new file.
- 0 previous results found.


480

In [10]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/480 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [12]:
# load final results
final_df = pd.read_json(JSON)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,cKZNbMvoqJaUe7n6lf6i7w,wildberry-pancakes-and-cafe-chicago-2,Wildberry Pancakes and Cafe,https://s3-media3.fl.yelpcdn.com/bphoto/uMLcfB...,False,https://www.yelp.com/biz/wildberry-pancakes-an...,9225,"[{'alias': 'pancakes', 'title': 'Pancakes'}, {...",4.5,"{'latitude': 41.884668, 'longitude': -87.62288}","[pickup, delivery]",$$,"{'address1': '130 E Randolph St', 'address2': ...",13129389777,(312) 938-9777,10721.811415
1,nnP8axu680aDGPdQ4TuSkA,wake-n-bacon-chicago,Wake ‘n Bacon,https://s3-media1.fl.yelpcdn.com/bphoto/rbp7mW...,False,https://www.yelp.com/biz/wake-n-bacon-chicago?...,419,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 41.940442, 'longitude': -87.64041}","[pickup, delivery, restaurant_reservation]",$$,"{'address1': '420 W Belmont Ave', 'address2': ...",17738805100,(773) 880-5100,14211.336624
2,d7n-NmN_c65-8Gzz2FILGQ,cracked-on-milwaukee-chicago,Cracked on Milwaukee,https://s3-media4.fl.yelpcdn.com/bphoto/8gLZf5...,False,https://www.yelp.com/biz/cracked-on-milwaukee-...,218,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.5,"{'latitude': 41.90671, 'longitude': -87.67125}","[pickup, delivery]",$$,"{'address1': '1359 N Milwaukee Ave', 'address2...",13129892247,(312) 989-2247,9682.484151
3,T9xKvh9GxwnuU_4WjYZb9w,wildberry-pancakes-and-cafe-chicago-4,Wildberry Pancakes and Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/GgpPjQ...,False,https://www.yelp.com/biz/wildberry-pancakes-an...,1482,"[{'alias': 'pancakes', 'title': 'Pancakes'}, {...",4.5,"{'latitude': 41.8977379, 'longitude': -87.6220...","[pickup, delivery]",$$,"{'address1': '196 E Pearson St', 'address2': '...",13124700590,(312) 470-0590,11630.263302
4,o4MU9gK7epWUMv2WVW1qrA,yolk-streeterville-chicago-2,Yolk - Streeterville,https://s3-media2.fl.yelpcdn.com/bphoto/owClU_...,False,https://www.yelp.com/biz/yolk-streeterville-ch...,2581,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 41.89235678927749, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '355 E Ohio St', 'address2': '', ...",13128229655,(312) 822-9655,11506.288492


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,JqYRQUWF7gINp2MugU8Avw,taqueria-tayahua-chicago,Taqueria Tayahua,https://s3-media4.fl.yelpcdn.com/bphoto/ahrLBb...,False,https://www.yelp.com/biz/taqueria-tayahua-chic...,115,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,"{'latitude': 41.848008, 'longitude': -87.6852073}",[pickup],$,"{'address1': '2411 S Western Ave', 'address2':...",17732473183,(773) 247-3183,4214.682229
996,xDklh8oI95MdmojgMU_wFQ,charlies-restaurant-bolingbrook,Charlie's Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/pco2LT...,False,https://www.yelp.com/biz/charlies-restaurant-b...,75,"[{'alias': 'tradamerican', 'title': 'American ...",3.5,"{'latitude': 41.695822758183, 'longitude': -88...","[pickup, delivery]",$,"{'address1': '130 S Clow International Pkwy', ...",16307710501,(630) 771-0501,36227.188559
997,AOo_3JBwGQ0cuPr1G-1nlQ,the-original-pancake-house-oak-lawn,The Original Pancake House,https://s3-media1.fl.yelpcdn.com/bphoto/cM-ax4...,False,https://www.yelp.com/biz/the-original-pancake-...,152,"[{'alias': 'tradamerican', 'title': 'American ...",3.0,"{'latitude': 41.6940022845277, 'longitude': -8...",[delivery],$$,"{'address1': '10900 S Cicero Ave', 'address2':...",17083469800,(708) 346-9800,15362.320131
998,BWgtmakp1vfcv-y4nQ2uQQ,caffe-umbria-chicago-4,Caffe Umbria,https://s3-media4.fl.yelpcdn.com/bphoto/caGYWU...,False,https://www.yelp.com/biz/caffe-umbria-chicago-...,270,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 41.8887173, 'longitude': -87.6311...",[delivery],$$,"{'address1': '346 N Clark St', 'address2': '',...",13128775166,(312) 877-5166,10423.745374
999,53meWlmcA8uAB-JxrKe94A,cesaronis-cafe-and-deli-woodstock,Cesaroni's Cafe and Deli,https://s3-media2.fl.yelpcdn.com/bphoto/uje8mP...,False,https://www.yelp.com/biz/cesaronis-cafe-and-de...,58,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 42.3164888264258, 'longitude': -8...","[pickup, delivery]",,"{'address1': '236 Main St', 'address2': '', 'a...",18153085844,(815) 308-5844,79967.18041


In [14]:
# check for duplicate results
final_df.duplicated().sum()

TypeError: unhashable type: 'list'

In [None]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

In [None]:
## Drop duplicate ids and confirm there are no more duplicates
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

In [None]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_NY_pizza.csv.gz', compression='gzip',index=False)