In [17]:
# library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

The code in this notebook was adapted from the following coding dojo lecture:
* [Code for efficient API Extraction](https://login.codingdojo.com/m/720/16250/120407)


In [18]:
# load API Credentials
with open('/Users/x471074/.secret/yelp_api.json') as f:
    login = json.load(f)

# isnstantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [19]:
# define search
LOCATION = 'Los Angeles, California'
TERM = 'Starbucks'

In [20]:
# create a results in progress JSON file
# include search terms in filename
JSON_FILE = "Data/results_in_progress_LA_startbucks.json"
JSON_FILE

'Data/results_in_progress_LA_startbucks.json'

In [21]:
# Check if JSON file exists
# If it doesn't exists create any folders needed and save empty list as JSON_FILE

# check if file exists
file_exists = os.path.isfile(JSON_FILE)
# if it does not exist
if file_exists == False:

    # create needed folder
    folder = os.path.dirname(JSON_FILE)

    # if JSON_FILE included a folder
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)

    # inform user and save empty list
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')

    with open(JSON_FILE,'w') as f:
        json.dump([],f)

else:
    print(f"[i] {JSON_FILE} already exists")

[i] Data/results_in_progress_LA_startbucks.json already exists


In [22]:
# Load previous results and use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)

n_results = len(previous_results)
print(f'- {n_results} previous results found')


- 0 previous results found


In [23]:
# use search query to perform API calls

results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                                offset=n_results)

results.keys()

dict_keys(['businesses', 'total', 'region'])

In [24]:
# total results available
total_results = results['total']
total_results

6200

In [25]:
# how many results did we retrieve
results_per_page = len(results['businesses'])
results_per_page

20

In [26]:
import time, math

# use math.ceil to round up for the total number of pages of results
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

310

In [27]:
# join new result with previous list and save
previous_results.extend(results['businesses'])
with open(JSON_FILE, 'w') as f:
    json.dump(previous_results,f)

In [28]:
for i in tqdm_notebook(range(1, n_pages+1)):

    # read in results in progress file and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)

    # save number of results to use as offset
    n_results = len(previous_results)

    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break

    # use n_results as offset
    results = yelp_api.search_query(location=LOCATION,
                                   term=TERM,
                                   offset=n_results)

    # append new results and save to file
    previous_results.extend(results['businesses'])

    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results,f)

    time.sleep(0.2)

  0%|          | 0/310 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [29]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,meb8dLzgvra_sQvHVJrD9w,starbucks-los-angeles-592,Starbucks,https://s3-media1.fl.yelpcdn.com/bphoto/2u79Jy...,False,https://www.yelp.com/biz/starbucks-los-angeles...,9,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 34.06192333, 'longitude': -118.30...",[],"{'address1': '3785 Wilshire Blvd', 'address2':...",12137230211,(213) 723-0211,1179.227512,
1,P2QwKVViQcZQJzav2KBhPQ,starbucks-los-angeles-551,Starbucks,https://s3-media3.fl.yelpcdn.com/bphoto/KLS-G2...,False,https://www.yelp.com/biz/starbucks-los-angeles...,4,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",2.0,"{'latitude': 34.0600267, 'longitude': -118.308...",[],"{'address1': '670 S Western Ave', 'address2': ...",12133835058,(213) 383-5058,1200.989396,
2,tE76daU5hUPb8cwQzSZwqQ,starbucks-los-angeles-110,Starbucks,https://s3-media1.fl.yelpcdn.com/bphoto/UUtVl3...,False,https://www.yelp.com/biz/starbucks-los-angeles...,165,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",3.0,"{'latitude': 34.061461, 'longitude': -118.3063...",[delivery],"{'address1': '3680 Wilshire Blvd', 'address2':...",12133835370,(213) 383-5370,1383.518137,$
3,2YDeRPINipVdFZm2lgavEA,starbucks-los-angeles-93,Starbucks,https://s3-media3.fl.yelpcdn.com/bphoto/7f-NyF...,False,https://www.yelp.com/biz/starbucks-los-angeles...,96,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",3.0,"{'latitude': 34.07494333, 'longitude': -118.32...",[delivery],"{'address1': '206 N Larchmont Blvd', 'address2...",13234691081,(323) 469-1081,1496.834045,$
4,fn0d4TtMb1AaSOoNdNoYzQ,starbucks-los-angeles-142,Starbucks,https://s3-media1.fl.yelpcdn.com/bphoto/2GQf5d...,False,https://www.yelp.com/biz/starbucks-los-angeles...,117,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",3.5,"{'latitude': 34.0619085, 'longitude': -118.338...",[delivery],"{'address1': '5020 Wilshire Blvd', 'address2':...",13239362989,(323) 936-2989,1615.983222,$


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
995,KwgwcP8GTyOwphwifasV-w,paris-baguette-los-angeles,Paris Baguette,https://s3-media3.fl.yelpcdn.com/bphoto/9p4iQT...,False,https://www.yelp.com/biz/paris-baguette-los-an...,419,"[{'alias': 'bakeries', 'title': 'Bakeries'}, {...",4.0,"{'latitude': 34.0738166208763, 'longitude': -1...",[delivery],"{'address1': '125 N Western Ave', 'address2': ...",13234670404,(323) 467-0404,1756.623296,$
996,7zS_o_W3kuK51evHYodI7w,kaldi-coffee-and-tea-south-pasadena,Kaldi Coffee and Tea,https://s3-media1.fl.yelpcdn.com/bphoto/YJbLPC...,False,https://www.yelp.com/biz/kaldi-coffee-and-tea-...,328,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",3.5,"{'latitude': 34.1145751, 'longitude': -118.156...",[delivery],"{'address1': '1019 El Centro St', 'address2': ...",16264035951,(626) 403-5951,16289.473549,$$
997,SucE_RVNy40uYskr2WS-eg,thank-you-coffee-los-angeles,Thank You Coffee,https://s3-media3.fl.yelpcdn.com/bphoto/a7QKpL...,False,https://www.yelp.com/biz/thank-you-coffee-los-...,158,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 34.06525, 'longitude': -118.23814}",[],"{'address1': '938 N Hill St', 'address2': None...",12139887069,(213) 988-7069,7677.630182,$$
998,rNU2WmPx_4ImwFxAB5kLwA,bon-bon-tea-house-los-angeles-6,Bon Bon Tea House,https://s3-media4.fl.yelpcdn.com/bphoto/n9pc7M...,False,https://www.yelp.com/biz/bon-bon-tea-house-los...,2013,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",4.5,"{'latitude': 34.246055206289, 'longitude': -11...",[delivery],"{'address1': '9663 Reseda Blvd', 'address2': N...",18182806677,(818) 280-6677,28534.16612,$$
999,sfyk-cjBO94SoPsN5_d3wQ,la-terraza-café-los-angeles-2,La Terraza Café,https://s3-media2.fl.yelpcdn.com/bphoto/wQ10v6...,False,https://www.yelp.com/biz/la-terraza-caf%C3%A9-...,318,"[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",4.5,"{'latitude': 34.0558406, 'longitude': -118.178...","[pickup, delivery]","{'address1': '4017 City Terrace Dr', 'address2...",13236858033,(323) 685-8033,13198.359739,$$


In [30]:
# check for duplicate ID's
final_df.duplicated(subset='id').sum()

0

In [31]:
# save df to csv
final_df.to_csv('Data/final_results_LA_starbucks.csv.gz')