In [1]:
# Standard Imports
import os
os.environ["OMP_NUM_THREADS"] = '1'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
with open('C:/Users/Mikey/Documents/keys/yelp_api.json') as f:
    login = json.load(f)
login.keys()

dict_keys(['client-id', 'api-key'])

In [3]:
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1ed024293d0>

# API CALLS

In [4]:
#Making Chicago and breakfast easily callable.
LOCATION = 'Chicago, IL'
TERM = 'Breakfast'

In [5]:
#JSON file location
JSON = "Data/results_progress_breakfast.json"
JSON

'Data/results_progress_breakfast.json'

In [6]:
#For loop to deal with JSON file
def create_json_file(JSON,delete_if_exists=False):
    #Check if it exsists.
    file_exists=os.path.isfile(JSON)
    #IF exists
    if file_exists==True:
        #Delete file?
        if delete_if_exists==True:
            print(f"[!] {JSON} already exists. Deleting previous file...")
            #delete file, then confirm it no longer exists
            os.remove(JSON)
            #Creating file again.
            create_json_file(JSON,delete_if_exists=False)
        else:
            print(f"[i] {JSON} already exists.")
    #If it does NOT exist
    else:
        #Inform, save empty list
        print(f"[i] {JSON} not found. Saving empty list to new file.")
        #create any needed folders, get folder name.
        folder=os.path.dirname(JSON)
        #IF JSON includes folder.
        if len(folder)>0:
            #Create it
            os.makedirs(folder,exist_ok=True)
            #Save empty list.
        with open(JSON,'w') as f:
            json.dump([],f)  

In [7]:
#New empty JSON
create_json_file(JSON, delete_if_exists=True)
#load Previous results, len to offset.
with open(JSON,'r') as f:
    previous_results = json.load(f)
#offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
#use yelp_api, search_query method to perform api call.
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
#How many results?
total_results = results['total']
#how many did we get details for?
results_per_page = len(results['businesses'])
#math.ceil to round up for the total number.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/results_progress_breakfast.json not found. Saving empty list to new file.
- 0 previous results found.


480

In [8]:
for i in tqdm_notebook( range(1,n_pages+1)):
    with open(JSON, 'r') as f:
        previous_results = json.load(f)
    #save number of results
    n_results = len(previous_results)
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    #n_results, as the offset
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    #append new results and save to file.
    previous_results.extend(results['businesses'])
    #display previous_results
    with open(JSON,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/480 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [9]:
#load final results
final_df = pd.read_json(JSON)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,cKZNbMvoqJaUe7n6lf6i7w,wildberry-pancakes-and-cafe-chicago-2,Wildberry Pancakes and Cafe,https://s3-media3.fl.yelpcdn.com/bphoto/uMLcfB...,False,https://www.yelp.com/biz/wildberry-pancakes-an...,9225,"[{'alias': 'pancakes', 'title': 'Pancakes'}, {...",4.5,"{'latitude': 41.884668, 'longitude': -87.62288}","[pickup, delivery]",$$,"{'address1': '130 E Randolph St', 'address2': ...",13129389777,(312) 938-9777,10721.811415
1,nnP8axu680aDGPdQ4TuSkA,wake-n-bacon-chicago,Wake ‘n Bacon,https://s3-media1.fl.yelpcdn.com/bphoto/rbp7mW...,False,https://www.yelp.com/biz/wake-n-bacon-chicago?...,418,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 41.940442, 'longitude': -87.64041}","[pickup, restaurant_reservation, delivery]",$$,"{'address1': '420 W Belmont Ave', 'address2': ...",17738805100,(773) 880-5100,14211.336624
2,d7n-NmN_c65-8Gzz2FILGQ,cracked-on-milwaukee-chicago,Cracked on Milwaukee,https://s3-media4.fl.yelpcdn.com/bphoto/8gLZf5...,False,https://www.yelp.com/biz/cracked-on-milwaukee-...,218,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.5,"{'latitude': 41.90671, 'longitude': -87.67125}","[pickup, delivery]",$$,"{'address1': '1359 N Milwaukee Ave', 'address2...",13129892247,(312) 989-2247,9682.484151
3,T9xKvh9GxwnuU_4WjYZb9w,wildberry-pancakes-and-cafe-chicago-4,Wildberry Pancakes and Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/GgpPjQ...,False,https://www.yelp.com/biz/wildberry-pancakes-an...,1483,"[{'alias': 'pancakes', 'title': 'Pancakes'}, {...",4.5,"{'latitude': 41.8977379, 'longitude': -87.6220...","[pickup, delivery]",$$,"{'address1': '196 E Pearson St', 'address2': '...",13124700590,(312) 470-0590,11630.263302
4,o4MU9gK7epWUMv2WVW1qrA,yolk-streeterville-chicago-2,Yolk - Streeterville,https://s3-media2.fl.yelpcdn.com/bphoto/owClU_...,False,https://www.yelp.com/biz/yolk-streeterville-ch...,2581,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.0,"{'latitude': 41.89235678927749, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '355 E Ohio St', 'address2': '', ...",13128229655,(312) 822-9655,11506.288492


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,VUBka1EPLgiGYZBjMowLtQ,i-love-frys-villa-park,I Love Frys,https://s3-media3.fl.yelpcdn.com/bphoto/RMw1zo...,False,https://www.yelp.com/biz/i-love-frys-villa-par...,69,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 41.88826, 'longitude': -87.96949}","[pickup, delivery]",,"{'address1': '54 S Villa Ave', 'address2': Non...",13317030552,(331) 703-0552,20694.951717
996,E3yRKqxWxBG9L041kVYrvQ,capital-one-café-chicago-6,Capital One Café,https://s3-media1.fl.yelpcdn.com/bphoto/AVuigB...,False,https://www.yelp.com/biz/capital-one-caf%C3%A9...,7,"[{'alias': 'cafes', 'title': 'Cafes'}]",4.5,"{'latitude': 41.7994057, 'longitude': -87.5892...",[],$,"{'address1': '1465 E 53rd St', 'address2': Non...",17733584129,(773) 358-4129,12320.180593
997,--kVRdC7JJ2Ph5UaXQw6aA,jojo-s-diner-schaumburg,Jo Jo's Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/Mk3Xj5...,False,https://www.yelp.com/biz/jojo-s-diner-schaumbu...,229,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 42.046957, 'longitude': -88.12665}","[pickup, delivery]",$$,"{'address1': '1935 W Golf Rd', 'address2': '',...",18478858786,(847) 885-8786,40503.041495
998,nrCsZq8efWiHSkx7Mx6wHg,davids-grill-chicago,David's Grill,https://s3-media1.fl.yelpcdn.com/bphoto/hQohVN...,False,https://www.yelp.com/biz/davids-grill-chicago?...,25,"[{'alias': 'diners', 'title': 'Diners'}, {'ali...",3.5,"{'latitude': 41.8527749, 'longitude': -87.6467...",[delivery],$,"{'address1': '800 W Cermak Rd', 'address2': ''...",13126667727,(312) 666-7727,7364.027987
999,PXTmbLliu0xr-pptLJPFFw,jessies-country-kitchen-university-park,Jessie's Country Kitchen,https://s3-media4.fl.yelpcdn.com/bphoto/cv_XBu...,False,https://www.yelp.com/biz/jessies-country-kitch...,35,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.0,"{'latitude': 41.4275132745721, 'longitude': -8...",[delivery],$,"{'address1': '1307 Hamilton Ave', 'address2': ...",17088855080,(708) 885-5080,45022.251039


In [10]:
final_df.duplicated(subset='id').sum()

25

In [11]:
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [12]:
final_df.to_csv('Data/final_results_breakfast.csv.gz', compression='gzip',index=False)