In [34]:
# Install tmdbsimple
!pip install tqdm



In [35]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [36]:
# Load API Credentials
with open('/Users/aharo/.secret/yelp_api.json') as f:
    login = json.load(f)

In [47]:
# Instantiate YelpAPI Variable
yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [48]:
# Define the search parameters
location = "Denver, CO"
cuisine = "Pizza"

In [49]:
# JSON File
JSON_FILE = "Data/results_in_progress_CO_pizza.json"
JSON_FILE

'Data/results_in_progress_CO_pizza.json'

In [50]:
# Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

# If it does not exist:
if not file_exists:

    # CREATE ANY NEEDED FOLDERS
    folder = os.path.dirname(JSON_FILE)
    if len(folder) > 0:
        # Create the folder
        os.makedirs(folder, exist_ok=True)

    # INFORM USER AND SAVE EMPTY LIST
    print(f'[i] {JSON_FILE} not found. Saving empty list to file.')

    # Save an empty list
    with open(JSON_FILE, 'w') as f:
        json.dump([], f)
        
# If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/results_in_progress_CO_pizza.json already exists.


In [51]:
# Load previous results and use len of results for offset
with open(JSON_FILE, 'r') as f:
    previous_results = json.load(f)

# Set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 1000 previous results found.


In [52]:
# Calculate the number of results remaining
results_remaining = total_results - n_results

# Calculate the number of pages needed
n_pages = math.ceil(results_remaining / results_per_page)
print(f'- {n_pages} pages of results needed.')

- 70 pages of results needed.


In [54]:
# Loop through the pages with a progress bar
for i in tqdm_notebook(range(n_pages)):
    # adds 200 ms pause
    time.sleep(0.2)

    # Perform API call to retrieve results for the current page
    results = yelp_api.search_query(location=location, term=cuisine, offset=n_results + (i * results_per_page))

    # Add the current page of results to the existing list
    previous_results.extend(results['businesses'])

    # Save the updated list to the JSON file
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)

  0%|          | 0/70 [00:00<?, ?it/s]

YelpAPIError: VALIDATION_ERROR: Too many results requested, limit+offset must be <= 1000.

In [55]:
# Loop through each page of results
for i in tqdm_notebook(range(1, n_pages + 1)):

    # Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)

    # Save the number of results to use as an offset
    n_results = len(previous_results)

    # Check if the total results exceed the limit of 1000
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 API calls. Stopping loop.')
        break

    # Use n_results as the offset in the API request
    results = yelp_api.search_query(location=location, term=cuisine, offset=n_results)

    # Append new results to previous_results
    previous_results.extend(results['businesses'])

    # Save the updated results to the file
    with open(JSON_FILE, 'w') as f:
        json.dump(previous_results, f)

    time.sleep(0.2)

  0%|          | 0/70 [00:00<?, ?it/s]

Exceeded 1000 API calls. Stopping loop.


In [56]:
# Load the "results in progress" JSON file into a DataFrame
final_df = pd.read_json(JSON_FILE)

# Display the first and last few rows of the DataFrame
display(final_df.head(), final_df.tail())

# Check for duplicate IDs
duplicate_ids = final_df.duplicated(subset='id').sum()
print(f'{duplicate_ids} duplicate IDs found.')

# Drop duplicate rows based on the 'id' column
final_df = final_df.drop_duplicates(subset='id')

# Confirm that there are no more duplicates
duplicate_ids = final_df.duplicated(subset='id').sum()
print(f'{duplicate_ids} duplicate IDs after dropping duplicates.')

# Save the final DataFrame to a compressed CSV file
final_df.to_csv('Data/final_results_CO_pizza.csv.gz', compression='gzip', index=False)

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,VFACYV_lGv9y4FV-_1m4Ig,brooklyns-finest-pizza-lowell-denver,Brooklyn's Finest Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/6cPY-Z...,False,https://www.yelp.com/biz/brooklyns-finest-pizz...,509,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 39.7875680710371, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '5007 Lowell Blvd', 'address2': '...",13034770066,(303) 477-0066,15510.516545
1,n18mJPI6mQfBJEp8UrITyw,blue-pan-pizza-denver-4,Blue Pan Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/EAkVmo...,False,https://www.yelp.com/biz/blue-pan-pizza-denver...,698,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 39.735349, 'longitude': -104.945823}",[delivery],$$,"{'address1': '3509 E 12th Ave', 'address2': ''...",17205190944,(720) 519-0944,8271.274253
2,Yk84dALUw0TPPf9sMuu2hg,redeemer-pizza-denver,Redeemer Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/tmZdbk...,False,https://www.yelp.com/biz/redeemer-pizza-denver...,130,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 39.75975, 'longitude': -104.98405}","[delivery, pickup]",,"{'address1': '2705 Larimer St', 'address2': No...",17207801379,(720) 780-1379,10934.932313
3,mmz8zURQXK4ijOYkK1qBTg,pizzeria-lui-lakewood,Pizzeria Lui,https://s3-media1.fl.yelpcdn.com/bphoto/eW3v8m...,False,https://www.yelp.com/biz/pizzeria-lui-lakewood...,340,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 39.6965331421814, 'longitude': -1...",[delivery],$$,"{'address1': '5380 W Mississippi Ave', 'addres...",13039223202,(303) 922-3202,18657.015124
4,kI5oOBAGS-Vo4chwmgOVmQ,cart-driver-denver,Cart-Driver,https://s3-media1.fl.yelpcdn.com/bphoto/YzE2Pj...,False,https://www.yelp.com/biz/cart-driver-denver?ad...,644,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 39.7575443618608, 'longitude': -1...","[delivery, pickup]",$$,"{'address1': '2500 Larimer St', 'address2': 'S...",13032923553,(303) 292-3553,11125.161044


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,65A35EEKuWBMduR1mlyRow,cheba-hut-toasted-subs-denver-14,Cheba Hut Toasted Subs,https://s3-media2.fl.yelpcdn.com/bphoto/fl0qWF...,False,https://www.yelp.com/biz/cheba-hut-toasted-sub...,234,"[{'alias': 'sandwiches', 'title': 'Sandwiches'}]",4.0,"{'latitude': 39.739736375011056, 'longitude': ...","[pickup, delivery]",$$,"{'address1': '638 E Colfax Ave', 'address2': '...",17204200806,(720) 420-0806,10817.900745
996,CQ-mdq40BgWEdn5HVjg_Qg,south-philly-cheese-steaks-cherry-hills-village,South Philly Cheese Steaks,https://s3-media1.fl.yelpcdn.com/bphoto/DKo5nR...,False,https://www.yelp.com/biz/south-philly-cheese-s...,127,"[{'alias': 'cheesesteaks', 'title': 'Cheeseste...",3.0,"{'latitude': 39.652804455708264, 'longitude': ...","[pickup, delivery]",$,"{'address1': '1400 E Hampden Ave', 'address2':...",13037815657,(303) 781-5657,15745.554959
997,QUtePDDK0ATGBD1Tk8nXXA,damascus-grill-littleton,Damascus Grill,https://s3-media4.fl.yelpcdn.com/bphoto/D82Sjz...,False,https://www.yelp.com/biz/damascus-grill-little...,497,"[{'alias': 'mideastern', 'title': 'Middle East...",4.5,"{'latitude': 39.6135348599588, 'longitude': -1...","[pickup, delivery]",$$,"{'address1': '1399 W Littleton Blvd', 'address...",13037976666,(303) 797-6666,20854.531613
998,FcyalVpVWzOHCYJnQDTLPw,leven-deli-denver-8,Leven Deli,https://s3-media4.fl.yelpcdn.com/bphoto/b0jahb...,False,https://www.yelp.com/biz/leven-deli-denver-8?a...,450,"[{'alias': 'sandwiches', 'title': 'Sandwiches'...",4.5,"{'latitude': 39.73547386, 'longitude': -104.98...","[pickup, delivery]",$$,"{'address1': '123 W 12th Ave', 'address2': '',...",13033255691,(303) 325-5691,11837.069028
999,GQ7KFIDJE2Q9SHfTws5OXA,king-soopers-denver-55,King Soopers,https://s3-media2.fl.yelpcdn.com/bphoto/NoXTB_...,False,https://www.yelp.com/biz/king-soopers-denver-5...,113,"[{'alias': 'grocery', 'title': 'Grocery'}, {'a...",3.0,"{'latitude': 39.756858, 'longitude': -104.998877}",[],$$,"{'address1': '1950 Chestnut Place', 'address2'...",13036782434,(303) 678-2434,12221.739202


1 duplicate IDs found.
0 duplicate IDs after dropping duplicates.
