In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

# Getting YelpAPI Credentials 

In [2]:
# Load API Credentials
with open('C:/Users/grace/.secret/yelp_api.json','r') as f:
    login = json.load(f)

In [3]:
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
yelp = YelpAPI(login['api-key'], timeout_s=5.0)

# Define Search Terms and File Path

In [5]:
location = 'Philadelphia, PA'
term = 'sushi'
## Specify folder for saving data
folder = 'Data/'
os.makedirs(folder, exist_ok=True)

In [6]:
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = folder+f"{location.split(',')[0]}-{term}.json"
JSON_FILE

'Data/Philadelphia-sushi.json'

# Check for JSON Existence and Create

In [7]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists ==False:
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE, 'w') as f:
        json.dump([],f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/Philadelphia-sushi.json not found. Saving empty list to file.


# Get First Page of Data

In [8]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(location=location, term=term)

In [9]:
type(results)

dict

In [10]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [11]:
results['total']

914

In [13]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [14]:
# Use math.ceil to round up for the total number of pages of results.
import time, math
n_pages = math.ceil((results['total'])/results_per_page)
n_pages

46

# Load Data into File

In [15]:
for i in tqdm_notebook( range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try:
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results for to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp.search_query(location=location,
                                   term=term,
                                   offset = n_results+1)

        ## append new results and save to file (dont use append)
        previous_results.extend(results['businesses'])
        
        with open(JSON_FILE, 'w') as f:
            json.dump(previous_results,f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print('[!] ERROR:',e)

  0%|          | 0/46 [00:00<?, ?it/s]

In [16]:
df = pd.read_json(JSON_FILE)
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 913 entries, 0 to 912
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             913 non-null    object 
 1   alias          913 non-null    object 
 2   name           913 non-null    object 
 3   image_url      913 non-null    object 
 4   is_closed      913 non-null    bool   
 5   url            913 non-null    object 
 6   review_count   913 non-null    int64  
 7   categories     913 non-null    object 
 8   rating         913 non-null    float64
 9   coordinates    913 non-null    object 
 10  transactions   913 non-null    object 
 11  location       913 non-null    object 
 12  phone          913 non-null    object 
 13  display_phone  913 non-null    object 
 14  distance       913 non-null    float64
 15  price          753 non-null    object 
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 108.0+ KB


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,price
0,paNYNJ-VFDgwLY201IrXCQ,sakana-omakasè-sushi-philadelphia-3,Sakana Omakasè Sushi,https://s3-media4.fl.yelpcdn.com/bphoto/4cFnCN...,False,https://www.yelp.com/biz/sakana-omakas%C3%A8-s...,176,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 39.94069400766489, 'longitude': -...","[delivery, pickup]","{'address1': '616 S 2nd St', 'address2': 'Fl 1...",12159222149,(215) 922-2149,2598.514407,
1,IS3h8_4gL_IbmpVTKDB3LA,royal-sushi-and-izakaya-philadelphia,Royal Sushi & Izakaya,https://s3-media2.fl.yelpcdn.com/bphoto/u0eDwE...,False,https://www.yelp.com/biz/royal-sushi-and-izaka...,323,"[{'alias': 'izakaya', 'title': 'Izakaya'}, {'a...",4.5,"{'latitude': 39.93802, 'longitude': -75.14662}",[delivery],"{'address1': '780-782 S 2nd St', 'address2': '...",12679099002,(267) 909-9002,2733.038345,$$
2,h7TO_IsmLCYmKKDVOOIeFw,fat-salmon-philadelphia,Fat Salmon,https://s3-media2.fl.yelpcdn.com/bphoto/BVbaDQ...,False,https://www.yelp.com/biz/fat-salmon-philadelph...,1164,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 39.9481899, 'longitude': -75.15338}","[delivery, restaurant_reservation, pickup]","{'address1': '719 Walnut St', 'address2': '', ...",12159288881,(215) 928-8881,1642.910147,$$
3,FqOCC8Y9xryaX7sIBJcrxA,vic-sushi-bar-philadelphia,Vic Sushi Bar,https://s3-media2.fl.yelpcdn.com/bphoto/NDsR2y...,False,https://www.yelp.com/biz/vic-sushi-bar-philade...,908,"[{'alias': 'sushi', 'title': 'Sushi Bars'}, {'...",4.0,"{'latitude': 39.9515123, 'longitude': -75.1743...",[delivery],"{'address1': '2035 Sansom St', 'address2': '',...",12155644339,(215) 564-4339,394.796535,$$
4,i76ERS3jM111T3VMbi2yrQ,sagami-collingswood,Sagami,https://s3-media2.fl.yelpcdn.com/bphoto/ju4DpU...,False,https://www.yelp.com/biz/sagami-collingswood?a...,628,"[{'alias': 'japanese', 'title': 'Japanese'}, {...",4.5,"{'latitude': 39.92116, 'longitude': -75.08756}",[delivery],"{'address1': '37 W Crescent Blvd', 'address2':...",18568549773,(856) 854-9773,7995.747985,$$


In [17]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Philadelphia-sushi.csv.gz'

In [18]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression='gzip',index=False)