In [1]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

In [2]:
# load API credentials
with open('/Users/wombatmaster/.secret/yelp_api.json') as f:
    creds = json.load(f)
# create YelpAPI variable
yelp_api = YelpAPI(creds['api-key'], timeout_s=5.0)


In [3]:
#set API call params and file name before first call
LOCATION = "Minneapolis, MN 55401"
TERM = 'Burgers'

In [4]:
#create Data folder and JSON file for results
FOLDER = "Data/"
os.makedirs(FOLDER,exist_ok=True)

JSON_FILE = f"Data/Minneapolis_Burgers.json"
JSON_FILE

'Data/Minneapolis_Burgers.json'

In [5]:
#JSON file check and creation is it doesn't already exist
if os.path.isfile(JSON_FILE)==False:
#if file doesn't exist: 
    print('The file does not exist.Creating empty file')    
        
#if file exists, print message
else:
    print('File already exists.')

File already exists.


In [6]:
#use Yelp_API search_query for API call
results = yelp_api.search_query(term=TERM,location = LOCATION)
print(results.keys())
#save first page of results
with open(JSON_FILE,'w') as f:
    json.dump(results['businesses'],f)

dict_keys(['businesses', 'total', 'region'])


In [7]:
## Load previous results and use len of results for offset
with open(JSON_FILE,'r') as f:
    previous_results = json.load(f)
    
## set offset based on previous results
n_results = len(previous_results)
print(f'- {n_results} previous results found.')

- 20 previous results found.


In [8]:
#total number of results returned
total_results = results['total']
total_results

370

In [9]:
#how many results were returned with details
results_per_page = len(results['businesses'])
results_per_page

20

In [10]:
#using math.ceil to round up number of result pages
n_pages = math.ceil((results['total']-n_results)/ results_per_page)
n_pages

18

In [11]:
#join new results with the old ones using .extend and save to file
previous_results.extend(results['businesses'])  
with open(JSON_FILE,'w') as f:
     json.dump(previous_results,f)

In [12]:
#create for loop to append and store the results is the loop interates
for i in tqdm_notebook( range(1,n_pages+1)):
    time.sleep(.1)
    #read results in progress file and check length
    with open(JSON_FILE, 'r') as f:
        previous_results = json.load(f)
    #number of resualts for offset
    n_results = len(previous_results)
    #set n_results as offset 
    results = yelp_api.search_query(location=LOCATION,
                                    term=TERM, 
                                    offset=n_results)
    
    #append new results and save
    previous_results.extend(results['businesses'])
    
    #display(previous_results)
    with open(JSON_FILE,'w') as f:
        json.dump(previous_results,f)

  0%|          | 0/18 [00:00<?, ?it/s]

In [13]:
#load final results as a dataframe
results_df = pd.read_json(JSON_FILE)
display(results_df.head(), results_df.tail())

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,MDPv2B2jwA1qLvQCXlEVww,red-cow-minneapolis-minneapolis-3,Red Cow - Minneapolis,https://s3-media2.fl.yelpcdn.com/bphoto/uajR_Z...,False,https://www.yelp.com/biz/red-cow-minneapolis-m...,1119,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 44.983574835459336, 'longitude': ...",[delivery],$$,"{'address1': '208 N 1st Ave', 'address2': '', ...",16122380050.0,(612) 238-0050,351.471253
1,RFdLxs7xfDmHSYZQIKiEBw,stray-dog-minneapolis-2,Stray Dog,https://s3-media4.fl.yelpcdn.com/bphoto/fd-T-d...,False,https://www.yelp.com/biz/stray-dog-minneapolis...,128,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 44.9888, 'longitude': -93.2549}","[delivery, pickup]",$$,"{'address1': '401 E Hennepin Ave', 'address2':...",16123782855.0,(612) 378-2855,952.969031
2,K2ahjZ7QiDJegYQpxtN-KQ,the-fabled-rooster-minneapolis-3,The Fabled Rooster,https://s3-media2.fl.yelpcdn.com/bphoto/_wSqjm...,False,https://www.yelp.com/biz/the-fabled-rooster-mi...,9,"[{'alias': 'bbq', 'title': 'Barbeque'}, {'alia...",4.0,"{'latitude': 44.98488609666274, 'longitude': -...","[delivery, pickup]",,"{'address1': '520 N 4th St', 'address2': 'Fl 1...",16124704820.0,(612) 470-4820,928.834252
3,FLknT5_ky5NI5Kt2ORjLpg,parlour-minneapolis,Parlour,https://s3-media3.fl.yelpcdn.com/bphoto/TK9q_K...,False,https://www.yelp.com/biz/parlour-minneapolis?a...,376,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 44.9887275695801, 'longitude': -9...",[pickup],$$,"{'address1': '730 N Washington Ave', 'address2...",16123543135.0,(612) 354-3135,1056.376102
4,mLhJNAjfa2NsGAqjqqrZvg,dream-creamery-minneapolis-2,Dream Creamery,https://s3-media3.fl.yelpcdn.com/bphoto/2v3dyU...,False,https://www.yelp.com/biz/dream-creamery-minnea...,21,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 45.013044863006236, 'longitude': ...",[],,"{'address1': '816 NE Lowry Ave', 'address2': '...",,,3385.987254


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
365,ZIsbdA7YFoX8pkUc7OptBQ,seward-co-op-creamery-cafe-minneapolis,Seward Co-op Creamery Cafe,https://s3-media4.fl.yelpcdn.com/bphoto/QAONbn...,False,https://www.yelp.com/biz/seward-co-op-creamery...,80,"[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",3.5,"{'latitude': 44.962646542798, 'longitude': -93...",[delivery],$$,"{'address1': '2601 E Franklin Ave', 'address2'...",16122305575,(612) 230-5575,3458.066591
366,3NnMbV3UuraqxBzgu67cjA,memory-lanes-and-the-flashback-cafe-minneapolis,Memory Lanes & The Flashback Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/EW8FxQ...,False,https://www.yelp.com/biz/memory-lanes-and-the-...,105,"[{'alias': 'bowling', 'title': 'Bowling'}, {'a...",3.5,"{'latitude': 44.9564361572266, 'longitude': -9...",[pickup],$,"{'address1': '2520 26th Ave S', 'address2': ''...",16127216211,(612) 721-6211,3930.404191
367,_qxnJd-72lSwTd0flnGBaA,mcdonalds-minneapolis-41,McDonald's,https://s3-media2.fl.yelpcdn.com/bphoto/EuslnT...,False,https://www.yelp.com/biz/mcdonalds-minneapolis...,12,"[{'alias': 'hotdogs', 'title': 'Fast Food'}, {...",2.5,"{'latitude': 44.9488317820602, 'longitude': -9...",[delivery],$,"{'address1': '3110 E Lake St', 'address2': '',...",16127299005,(612) 729-9005,5010.324248
368,fIol0ba3o3ZxnAENOStUcw,caribou-coffee-minneapolis-22,Caribou Coffee,https://s3-media2.fl.yelpcdn.com/bphoto/Db3Yrx...,False,https://www.yelp.com/biz/caribou-coffee-minnea...,26,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.0,"{'latitude': 44.97393, 'longitude': -93.22483}",[delivery],$,"{'address1': '917 Washington Avenue SE', 'addr...",16123314121,(612) 331-4121,3405.867192
369,ok6BVxCW0n3QGFbHWtJlGg,stanleys-on-wheels-minneapolis,Stanley's On Wheels,https://s3-media4.fl.yelpcdn.com/bphoto/9Lvpw2...,False,https://www.yelp.com/biz/stanleys-on-wheels-mi...,9,"[{'alias': 'foodtrucks', 'title': 'Food Trucks...",3.0,"{'latitude': 45.01677322387695, 'longitude': -...",[],$,"{'address1': '', 'address2': '', 'address3': '...",16127882529,(612) 788-2529,4552.36963


In [14]:
#check for duplicates
results_df.duplicated(subset='id').sum()

20

In [16]:
#remove duplicates
results_df.drop_duplicates(subset='id', inplace=True)
results_df.duplicated(subset='id').sum()

0

In [17]:
#save results to a compressed csv
results_df.to_csv('Data/results_minneapolis_burgers.csv.gz', compression='gzip',index=False)