# Extracting and Saving Data from Yelp API

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi



### Load Credentials and Create Yelp API Object

In [3]:
# Load API Credentials
with open('/Users/abigailbojorquez/.secret/yelp_api.json') as f: #change the path to match YOUR path!!
    login = json.load(f)
login.keys()

dict_keys(['Client-id', 'API-key'])

In [4]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['API-key'], timeout_s=5.0)
yelp

<yelpapi.yelpapi.YelpAPI at 0x7fb8a7258160>

### Define Search Terms and File Paths

In [5]:
# set our API call parameters and filename before the first call
# set our API call parameters 
LOCATION = 'Huntington Beach, CA 92647'
TERM = 'italian'


In [6]:
## Specify fodler for saving data
FOLDER = 'Data/'
os.makedirs(FOLDER,exist_ok=True)

In [7]:
LOCATION.split(',')[0]

'Huntington Beach'

In [8]:
# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{LOCATION.split(',')[0]}-(term).json"

In [9]:
JSON_FILE

'Data/Huntington Beach-(term).json'

### Check if Json File exists and Create it if it doesn't

In [10]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder, exist_ok=True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open (JSON_FILE, 'w') as f:
            json.dump([],f)
        
## If it exists, inform user
else:
    print(f"[i]{JSON_FILE} already exists")

[i]Data/Huntington Beach-(term).json already exists


### Make the first API call to get the first page of data

In [15]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(term = TERM,location = LOCATION)

In [16]:
## How many results total?
type(results)

dict

In [17]:
len(results)

3

In [18]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [19]:
results['total']

395

In [20]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [21]:
# Use math.ceil to round up for the total number of pages of results.
import time, math

n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

20

In [26]:
for i in tqdm_notebook( range(1,n_pages+1)):
    try:
        time.sleep(.2)
        with open (JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        n_results = len(previous_results)
        results = yelp.search_query(location = LOCATION, TERM = TERM, offest = n_results+1)
        previous_results.extend(results['businesses'])
        with open (JSON_FILE, 'w') as f:
            json.dump(previous_results,f)
    except Exception as e:
        print('[!] ERROR:', e)


  0%|          | 0/20 [00:00<?, ?it/s]

## Open the Final JSON File with Pandas

In [27]:
df = pd.read_json(JSON_FILE)
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,ia09aCVFnDFwoRweRwhcLQ,bunz-huntington-beach-3,Bunz,https://s3-media2.fl.yelpcdn.com/bphoto/GWN6QW...,False,https://www.yelp.com/biz/bunz-huntington-beach...,1662,"[{'alias': 'hotdog', 'title': 'Hot Dogs'}, {'a...",4.5,"{'latitude': 33.733991, 'longitude': -117.998171}","[delivery, pickup]",$,"{'address1': '7491 Center Ave', 'address2': No...",17147667060,(714) 766-7060,1196.818773
1,Xo9gKql3lRZc2iRQcRwPtQ,the-donuttery-huntington-beach,The Donuttery,https://s3-media2.fl.yelpcdn.com/bphoto/dNBilk...,False,https://www.yelp.com/biz/the-donuttery-hunting...,4492,"[{'alias': 'donuts', 'title': 'Donuts'}, {'ali...",4.5,"{'latitude': 33.7094609, 'longitude': -117.988...","[delivery, pickup]",$,"{'address1': '17420 Beach Blvd', 'address2': '...",17148477000,(714) 847-7000,2501.33433
2,vAzVWlsBPJx4nDNJb3cVEw,curry-and-kabab-bistro-huntington-beach,Curry & Kabab Bistro,https://s3-media4.fl.yelpcdn.com/bphoto/Dcjkjo...,False,https://www.yelp.com/biz/curry-and-kabab-bistr...,1091,"[{'alias': 'indpak', 'title': 'Indian'}]",4.5,"{'latitude': 33.7299751, 'longitude': -118.005...","[delivery, pickup]",$$,"{'address1': '7114 Edinger Ave', 'address2': N...",17148411800,(714) 841-1800,289.502944
3,-E3RrsOr4QUIr_q-sS1D4g,subculture-extraordinary-sandwiches-huntington...,Subculture Extraordinary Sandwiches,https://s3-media1.fl.yelpcdn.com/bphoto/BMfil2...,False,https://www.yelp.com/biz/subculture-extraordin...,2358,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",4.5,"{'latitude': 33.71513, 'longitude': -117.9905}",[delivery],$$,"{'address1': '7862 Warner Ave', 'address2': 'S...",17148471113,(714) 847-1113,1941.765373
4,mtn4uojv4aXT5-WfTKnxow,the-vox-kitchen-by-kei-concepts-fountain-valley,The Vox Kitchen by Kei Concepts,https://s3-media3.fl.yelpcdn.com/bphoto/cRv41y...,False,https://www.yelp.com/biz/the-vox-kitchen-by-ke...,5691,"[{'alias': 'vietnamese', 'title': 'Vietnamese'...",4.5,"{'latitude': 33.72744, 'longitude': -117.95568}","[restaurant_reservation, delivery, pickup]",$$,"{'address1': '16161 Brookhurst St', 'address2'...",16572316493,(657) 231-6493,4697.23564


In [28]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/Huntington Beach-(term).csv.gz'

In [29]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file, compression = 'gzip', index = False)