- We will use the Yelp API to search our favorite city for a cuisine type of our choice.
- Extract all of the results from our search and compile them into one dataframe using a for loop.

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/nusratjahan/.secret/yelp_api.json') as f:
    login = json.load(f)
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
# set our API call parameters 
LOCATION = 'Orlando,FL'
TERM = 'Biryani'

In [4]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/FL_biryani.json"
JSON_FILE

'Data/FL_biryani.json'

In [5]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # save an empty list
    with open(JSON_FILE,'w') as f:
        json.dump([],f)  
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/FL_biryani.json not found. Saving empty list to file.


In [6]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 0 previous results found.


In [7]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [8]:
## How many results total?
total_results = results['total']
total_results

77

In [9]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [10]:
# Import additional packages for controlling our loop
import time, math
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

4

In [11]:
# join new results with old list with extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [12]:
from tqdm.notebook import tqdm_notebook
import time
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(.2)

  0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    # add a 200ms pause
    time.sleep(.2)

  0%|          | 0/4 [00:00<?, ?it/s]

In [14]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df)

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,tSNA_ajn-xpyYP2pm9XvOg,bawarchi-biryanis-orlando-4,Bawarchi Biryanis,https://s3-media4.fl.yelpcdn.com/bphoto/aWUobC...,False,https://www.yelp.com/biz/bawarchi-biryanis-orl...,120,"[{'alias': 'indpak', 'title': 'Indian'}]",3.5,"{'latitude': 28.4613502, 'longitude': -81.46755}",[delivery],$$,"{'address1': '6315 International Dr', 'address...",14073687938,(407) 368-7938,11326.875787
1,hLmDfRdj2LYiHHVHnw5C-g,five-star-south-indian-food-and-catering-orlando,Five Star South Indian Food & Catering,https://s3-media3.fl.yelpcdn.com/bphoto/DO-Vfy...,False,https://www.yelp.com/biz/five-star-south-india...,48,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.5,"{'latitude': 28.424644, 'longitude': -81.406523}",[delivery],$,"{'address1': '9404 S Orange Blossom Trl', 'add...",14079305043,(407) 930-5043,8082.852383
2,DDvLLp9cT1m0U8SN-DYIKA,saffron-indian-cuisine-orlando-2,Saffron Indian Cuisine,https://s3-media3.fl.yelpcdn.com/bphoto/v2lZHU...,False,https://www.yelp.com/biz/saffron-indian-cuisin...,1019,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.5,"{'latitude': 28.4485988912675, 'longitude': -8...","[pickup, delivery]",$$,"{'address1': '7724 W Sand Lake Rd', 'address2'...",14076748899,(407) 674-8899,13653.865922
3,n4diWKcRvAFctwdRA6IPMg,ahmed-indian-restaurant-ucf-orlando,Ahmed Indian Restaurant UCF,https://s3-media1.fl.yelpcdn.com/bphoto/RD99V6...,False,https://www.yelp.com/biz/ahmed-indian-restaura...,166,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.5,"{'latitude': 28.596572, 'longitude': -81.243464}","[pickup, delivery]",$$,"{'address1': '10042 University Blvd', 'address...",14075747177,(407) 574-7177,16803.380678
4,cxVsg05tnysqe03tudgkTQ,tabla-indian-restaurant-orlando-orlando,Tabla Indian Restaurant - Orlando,https://s3-media2.fl.yelpcdn.com/bphoto/JL38Fo...,False,https://www.yelp.com/biz/tabla-indian-restaura...,1805,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",4.0,"{'latitude': 28.4771443, 'longitude': -81.4549...","[pickup, delivery]",$$,"{'address1': '5847 Grand National Dr', 'addres...",14072489400,(407) 248-9400,9894.930531
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,nbD4hcCWGT3VgL0KkQ70Jg,taj-punjabi-orlando-2,Taj Punjabi,https://s3-media2.fl.yelpcdn.com/bphoto/FeYnZI...,False,https://www.yelp.com/biz/taj-punjabi-orlando-2...,88,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali...",2.5,"{'latitude': 28.4558851, 'longitude': -81.4701...","[pickup, delivery]",$$,"{'address1': '6524 Carrier Dr', 'address2': ''...",14073525510,(407) 352-5510,11703.248972
70,HONsErAtIRzPuc0ekGGyRQ,taj-mahal-great-indian-restaurant-orlando,Taj Mahal Great Indian Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/gMct5h...,False,https://www.yelp.com/biz/taj-mahal-great-india...,210,"[{'alias': 'indpak', 'title': 'Indian'}]",3.0,"{'latitude': 28.440695143838717, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '8554 International Dr', 'address...",14072486565,(407) 248-6565,12330.528506
71,Qlkkw20FyMoWqNQk9t9S0w,cedars-restaurant-orlando,Cedar's Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/BgVFQ5...,False,https://www.yelp.com/biz/cedars-restaurant-orl...,279,"[{'alias': 'mideastern', 'title': 'Middle East...",4.0,"{'latitude': 28.448556, 'longitude': -81.488213}","[pickup, delivery]",$$,"{'address1': '7732 W Sand Lake Rd', 'address2'...",14073516000,(407) 351-6000,13617.807520
72,paMnCqJmvz8zA2D7bHXf8A,new-taste-of-india-kissimmee,New taste of India,,False,https://www.yelp.com/biz/new-taste-of-india-ki...,1,"[{'alias': 'indpak', 'title': 'Indian'}]",4.0,"{'latitude': 28.3335922875016, 'longitude': -8...",[],,{'address1': '4985 W Irlo Bronson Memorial Hwy...,14075079099,(407) 507-9099,20585.859310


In [15]:
# check for duplicate ID's 
final_df.duplicated(subset='id').sum()

0

In [16]:
# save the final results to a compressed csv
final_df.to_csv('Data/FL_biryani.csv.gz', compression='gzip',index=False)