In [1]:
#Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# Load API Credentials
with open('/Users/ashik/.secret/yelp_api.json') as f:   #use your path here!
    login = json.load(f)

yelp_api = YelpAPI(login['api-key'], timeout_s=5.0)

In [3]:
yelp_api

<yelpapi.yelpapi.YelpAPI at 0x1620dbfd0>

# API Call

## Define Search Results and In Progress File

In [4]:
#Set API call parameters
LOCATION = "San Jose,CA"
TERM = "Pizza"

In [5]:
# Specifying JSON_FILE filename (can include a folder)
# include the search terms in the filename
JSON_FILE = "Data/results_in_progress_CA_pizza.json"
JSON_FILE

'Data/results_in_progress_CA_pizza.json'

## Create json file function, delete if file already exists

In [6]:
def create_json_file(JSON_FILE,  delete_if_exists=False):
    
    ## Check if JSON_FILE exists
    file_exists = os.path.isfile(JSON_FILE)
    
    ## If it DOES exist:
    if file_exists == True:
        
        ## Check if user wants to delete if exists
        if delete_if_exists==True:
            
            print(f"[!] {JSON_FILE} already exists. Deleting previous file...")
            ## delete file and confirm it no longer exits.
            os.remove(JSON_FILE)
            ## Recursive call to function after old file deleted
            create_json_file(JSON_FILE,delete_if_exists=False)
        else:
            print(f"[i] {JSON_FILE} already exists.")            
            
            
    ## If it does NOT exist:
    else:
        
        ## INFORM USER AND SAVE EMPTY LIST
        print(f"[i] {JSON_FILE} not found. Saving empty list to new file.")
        
        ## CREATE ANY NEEDED FOLDERS
        # Get the Folder Name only
        folder = os.path.dirname(JSON_FILE)
        
        ## If JSON_FILE included a folder:
        if len(folder)>0:
            # create the folder
            os.makedirs(folder,exist_ok=True)
        ## Save empty list to start the json file
        with open(JSON_FILE,'w') as f:
            json.dump([],f)  

## Retrieve n-results, total_results, results_per_page, and n_pages variables 

In [7]:
## Create a new empty json file (exist the previous if it exists)
create_json_file(JSON_FILE, delete_if_exists=True)
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')
# use our yelp_api variable's search_query method to perform our API call
results = yelp_api.search_query(location=LOCATION,
                                term=TERM,
                               offset=n_results)
## How many results total?
total_results = results['total']
## How many did we get the details for?
results_per_page = len(results['businesses'])
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

[i] Data/results_in_progress_CA_pizza.json not found. Saving empty list to new file.
- 0 previous results found.


110

## Saving businesses

In [8]:
for i in tqdm_notebook( range(1,n_pages+1)):
    
    ## Read in results in progress file and check the length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    ## save number of results for to use as offset
    n_results = len(previous_results)
    
    if (n_results + results_per_page) > 1000:
        print('Exceeded 1000 api calls. Stopping loop.')
        break
    
    ## use n_results as the OFFSET 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    
    
    ## append new results and save to file
    previous_results.extend(results['businesses'])
    
    # display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)
    
    time.sleep(.2)

  0%|          | 0/110 [00:00<?, ?it/s]

Exceeded 1000 api calls. Stopping loop.


In [9]:
# load final results
final_df = pd.read_json(JSON_FILE)
display(final_df.head(), final_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,QNzqZtNb_I1mcHAviE5zeQ,bibos-ny-pizza-san-jose,Bibo's Ny Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/slNv72...,False,https://www.yelp.com/biz/bibos-ny-pizza-san-jo...,1386,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 37.306477955952, 'longitude': -12...",[pickup],$$,"{'address1': '1431 Bird Ave', 'address2': None...",14082179084,(408) 217-9084,1253.647404
1,zVCIEtCsInWBZIs1vF73wg,square-pie-guys-campbell,Square Pie Guys,https://s3-media2.fl.yelpcdn.com/bphoto/P06HvK...,False,https://www.yelp.com/biz/square-pie-guys-campb...,3,"[{'alias': 'chicken_wings', 'title': 'Chicken ...",4.5,"{'latitude': 37.293896, 'longitude': -121.931254}",[],,"{'address1': '1640 S Bascom Ave', 'address2': ...",14159928206,(415) 992-8206,4801.735904
2,7HBtj81aBo2DEZFTPaaeqg,slice-of-homage-pizza-san-jose,Slice of Homage Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/Pin-QL...,False,https://www.yelp.com/biz/slice-of-homage-pizza...,300,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 37.33497773585703, 'longitude': -...","[pickup, delivery]",$$,"{'address1': '163 W Santa Clara St', 'address2...",14084904477,(408) 490-4477,2189.653218
3,7NOpkoAUAGqMKEZgSu1QmA,a-slice-of-new-york-san-jose,A Slice of New York,https://s3-media4.fl.yelpcdn.com/bphoto/aTipil...,False,https://www.yelp.com/biz/a-slice-of-new-york-s...,3329,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 37.3235243214913, 'longitude': -1...",[delivery],$$,"{'address1': '3443 Stevens Creek Blvd', 'addre...",14082475423,(408) 247-5423,6589.770466
4,vDmGBY09fUawZKVyh5ozLQ,sliver-pizzeria-fremont-fremont,Sliver Pizzeria - Fremont,https://s3-media1.fl.yelpcdn.com/bphoto/HIcV7D...,False,https://www.yelp.com/biz/sliver-pizzeria-fremo...,48,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",5.0,"{'latitude': 37.549543, 'longitude': -121.98447}",[],,"{'address1': '3658 Capitol Ave', 'address2': '...",19254064809,(925) 406-4809,27365.922824


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
995,wo13F6b8lyCyZnD04WSDYQ,chuck-e-cheese-cupertino-2,Chuck E. Cheese,https://s3-media3.fl.yelpcdn.com/bphoto/vEfG_E...,False,https://www.yelp.com/biz/chuck-e-cheese-cupert...,219,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",3.0,"{'latitude': 37.323660567993, 'longitude': -12...","[pickup, delivery]",$$,"{'address1': '19805 Stevens Creek Blvd', 'addr...",14088738485,(408) 873-8485,11879.986941
996,AMPeKmZJepoDwK1uCV7feQ,devout-coffee-fremont,Devout Coffee,https://s3-media3.fl.yelpcdn.com/bphoto/WDtC4D...,False,https://www.yelp.com/biz/devout-coffee-fremont...,1030,"[{'alias': 'coffeeroasteries', 'title': 'Coffe...",4.5,"{'latitude': 37.577333, 'longitude': -121.980799}",[delivery],$,"{'address1': '37323 Niles Blvd', 'address2': '...",15107445189,(510) 744-5189,30211.383773
997,TIZJa5kYVWR8RUnskbVmnQ,p-za-kitchen-campbell,P.Za Kitchen,https://s3-media4.fl.yelpcdn.com/bphoto/9T3bDF...,False,https://www.yelp.com/biz/p-za-kitchen-campbell...,12,"[{'alias': 'fooddeliveryservices', 'title': 'F...",2.0,"{'latitude': 37.27966575520958, 'longitude': -...",[],,"{'address1': '', 'address2': None, 'address3':...",18887996601,(888) 799-6601,7416.408837
998,USxOexfcA4VwyLyc43k01w,sgd-tofu-house-san-jose-2,SGD Tofu House,https://s3-media3.fl.yelpcdn.com/bphoto/01B37n...,False,https://www.yelp.com/biz/sgd-tofu-house-san-jo...,682,"[{'alias': 'korean', 'title': 'Korean'}]",4.0,"{'latitude': 37.249537154827, 'longitude': -12...","[pickup, delivery]",$$,"{'address1': '832 Blossom Hill Rd', 'address2'...",14082278583,(408) 227-8583,7857.445712
999,luj6QJYgI4XkAR_t8SFxvQ,8elements-perfect-indian-cuisine-san-jose,8Elements Perfect Indian Cuisine,https://s3-media1.fl.yelpcdn.com/bphoto/sRkwWA...,False,https://www.yelp.com/biz/8elements-perfect-ind...,1052,"[{'alias': 'indpak', 'title': 'Indian'}]",3.5,"{'latitude': 37.309618, 'longitude': -121.810133}","[pickup, delivery]",$$,"{'address1': '1781 E Capitol Expy', 'address2'...",14082702577,(408) 270-2577,6671.081049


In [10]:
final_df.duplicated(subset='id').sum()

11

In [11]:
final_df = final_df.drop_duplicates(subset='id')
final_df.duplicated(subset='id').sum()

0

In [12]:
# save the final results to a compressed csv
final_df.to_csv('Data/final_results_CA_pizza.csv.gz', compression='gzip',index=False)