# Task
___
Create my own JSON file using efficient API call methods to retrieve data.

## Imports

In [1]:
# Imports
import pandas as pd
import math, time, json, os
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

## API Settup / JSON File Creation

In [2]:
# API Credentials
with open('/Users/aveld/secret/yelp_api.json') as f:
    login = json.load(f)
    
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
# API parameters
LOCATION = 'Boston, MA'
TERM = 'Ice Cream'

In [4]:
# Specifying JSON File
JSON_FILE = "Data/in_progress_boston_icecream.json"
JSON_FILE

'Data/in_progress_boston_icecream.json'

In [5]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

## If it does not exist: 
if file_exists == False:

    # Get the Folder Name
    folder = os.path.dirname(JSON_FILE)
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
        
    ## Inform User
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')
    
    
    # Save an Empty List
    with open(JSON_FILE,'w') as f:
        json.dump([],f)

# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/in_progress_boston_icecream.json not found. Saving empty list to file.


## First API call

In [6]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'{n_results} previous results found. Offset set to {n_results}.')

# Perform first API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)

## Total results
total_results = results['total']

## Results in a single "page"
results_per_page = len(results['businesses'])

# Finding total number of pages
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

0 previous results found. Offset set to 0


95

Now we have what we need to create the loop

## API call 'for' loop

In [7]:
# Iterate through each page of results
for i in tqdm_notebook(range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
        
    ## create n_results to use as offset
    n_results = len(previous_results)
    
    # The code will stop running once it exceeds 1,000 total results
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1,000 API calls. Stopping loop.')
        break
    
    ## API call saved in "results"
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results
    previous_results.extend(results['businesses'])
    
    # Write new results to JSON file
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/95 [00:00<?, ?it/s]

Exceeded 1,000 API calls. Stopping loop.


## Creatind pd.DataFrame / Exporting Result

In [8]:
# load final results

# This creates the DataFrame
df = pd.read_json(JSON_FILE)

In [9]:
## Drop duplicate IDs and confirm there are no more duplicates
final_df = df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [10]:
# save the final results to a compressed csv
final_df.to_csv('Data/results_boston_icecream.csv.gz', compression='gzip',index=False)