* Oleksandra Aliyeva
* April 2022
* Find sushi in Boston

In [20]:
#import libraries
import pandas as pd
import numpy as np

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [21]:
#load API credentials
with open('/Users/oleksandraaliyeva/.secret/yelp_api.json') as f: 
    login = json.load(f)
#instantiate YelpAPI variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [22]:
#set API call parameters and filename before the first call
LOCATION = 'Boston, MA, 02101'
TERM = 'Sushi'

In [23]:
#specifying JSON_FILE filename
JSON_FILE = f'Data/results_in_progress_Sushi.json'
JSON_FILE 

'Yelp/results_in_progress_Sushi.json'

In [24]:
#check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
#if it does not exist: 
if file_exists == False:
    folder = os.path.dirname(JSON_FILE)
    #if JSON_FILE included a folder:
    if len(folder)>0:
    #create the folder
        os.makedirs(folder, exist_ok = True)
    
     #INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
     #save the first page of results
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
#if it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Yelp/results_in_progress_Sushi.json not found. Saving empty list to file.


In [25]:
#load previous results and use len of results for offset
with open (JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
#set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [26]:
#use yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                               term=TERM,
                               offset=n_results+1)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [27]:
#how many results total?
total_results = results['total']
total_results

640

In [28]:
#how many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [29]:
#use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/results_per_page)
n_pages

32

In [30]:
#join new results with old list with extend and save to file
previous_results.extend(results['businesses'])
with open(JSON_FILE,'w') as f:
    json.dump(previous_results,f)

In [31]:
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.2)
    #read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    #save number of results for to use as offset
    n_results = len(previous_results)
    #use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results+1)
    
    #append new results and save to file
    previous_results.extend(results['businesses'])
    
    #display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/32 [00:00<?, ?it/s]

In [32]:
#load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(3), final_df.tail(3))

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,E3Qqa99qFy1oeGMv0wPBBQ,tora-japanese-restaurant-boston,Tora Japanese Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/ZdaYpg...,False,https://www.yelp.com/biz/tora-japanese-restaur...,553,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 42.35076, 'longitude': -71.06054}","[pickup, delivery]",$$,"{'address1': '20B Tyler St', 'address2': None,...",16175426688,(617) 542-6688,934.549817
1,baoHJnYFntDUICr10aFj8Q,sakana-cambridge-2,Sakana,https://s3-media1.fl.yelpcdn.com/bphoto/wb4xhJ...,False,https://www.yelp.com/biz/sakana-cambridge-2?ad...,242,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.5,"{'latitude': 42.3693, 'longitude': -71.11082}","[pickup, delivery]",$$,"{'address1': '983 Massachusetts Ave', 'address...",16177144646,(617) 714-4646,4030.346045
2,Z6Io2AbJrof7TPVliMZkAg,fuji-at-ink-block-boston,Fuji at Ink Block,https://s3-media2.fl.yelpcdn.com/bphoto/xhHJpc...,False,https://www.yelp.com/biz/fuji-at-ink-block-bos...,543,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 42.345077, 'longitude': -71.063505}","[pickup, delivery, restaurant_reservation]",$$,"{'address1': '352 Harrison Ave', 'address2': '...",16179363282,(617) 936-3282,706.357329


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
636,tZV72RPEG2E24uFulcGjVg,star-market-auburndale,Star Market,https://s3-media4.fl.yelpcdn.com/bphoto/p4SlaX...,False,https://www.yelp.com/biz/star-market-auburndal...,40,"[{'alias': 'grocery', 'title': 'Grocery'}]",2.0,"{'latitude': 42.3467438, 'longitude': -71.2447...",[delivery],$$,"{'address1': '2040 Commonwealth Ave', 'address...",16179651793,(617) 965-1793,14258.843346
637,m6ZCINThm8PwkK91qJcrPw,trader-joes-arlington,Trader Joe's,https://s3-media1.fl.yelpcdn.com/bphoto/KwDTvS...,False,https://www.yelp.com/biz/trader-joes-arlington...,96,"[{'alias': 'grocery', 'title': 'Grocery'}]",3.5,"{'latitude': 42.4254178, 'longitude': -71.1888...",[],$$,"{'address1': '1427 Massachusetts Ave', 'addres...",17816469138,(781) 646-9138,12948.424375
638,MpfauJengAZHTzoIkwP_yw,the-cheesecake-factory-burlington,The Cheesecake Factory,https://s3-media1.fl.yelpcdn.com/bphoto/95h89v...,False,https://www.yelp.com/biz/the-cheesecake-factor...,529,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",3.0,"{'latitude': 42.4822898, 'longitude': -71.2135...","[delivery, restaurant_reservation]",$$,"{'address1': '75 Middlesex Turnpike', 'address...",17812730060,(781) 273-0060,18972.757453


In [33]:
#check for duplicate IDs
final_df.duplicated(subset='id').sum()

1

In [34]:
final_df.drop_duplicates(subset='id', keep='first', inplace = True)

In [35]:
final_df.duplicated(subset='id').sum()

0

In [36]:
# save the final results to a compressed csv
final_df.to_csv('Yelp/final_results_sushi.csv',
                index=False)