## Imports

In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#additional imports

import os, json, math, time 

# to make yelpapi
from yelpapi import YelpAPI

# progress bar from tqdm_notebook
from tqdm.notebook import tqdm_notebook

In [2]:
!pip install yelpapi
!pip install tqdm



## Load credentials and Create Yelp API Object

In [21]:
# Load API Credentials    
    
with open('/Users/ROSA/.secret/yelp_api.json', 'r') as f:
    login = json.load(f)

In [22]:
login.keys()

dict_keys(['client-id', 'api-key'])

In [24]:
login.items()


dict_items([('client-id', 'AzyqDaAFoaPEVDlGQs1aoQ'), ('api-key', 'B4AL2K-z8VSqDQ78_86vJmMqxIdDojIe7H6hFeDTfsrhUg8fxXk-1b_moXJg5tVs7leCsBw7lztyidGjjV-nryW8Jn03gJzK8EybNXP7Oc-XqAuI5hDsUIdk06H4ZHYx')])

In [26]:
login['api-key']

'B4AL2K-z8VSqDQ78_86vJmMqxIdDojIe7H6hFeDTfsrhUg8fxXk-1b_moXJg5tVs7leCsBw7lztyidGjjV-nryW8Jn03gJzK8EybNXP7Oc-XqAuI5hDsUIdk06H4ZHYx'

In [27]:
# Instantiate YelpAPI Variable
yelp = YelpAPI (login['api-key'], timeout_s = 5.0)

### Define Search Terms and File Paths

In [28]:
# set our API call parameters and filename before the first call
location = 'San Antonio, TX, 78216'
term = 'boba tea'

In [29]:
location.split(',')[0]

'San Antonio'

In [30]:
## Specify fodler for saving data
FOLDER = 'Data/'

os.makedirs(FOLDER, exist_ok = True)


# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [31]:
JSON_FILE

'Data/San Antonio-boba tea.json'

### Load JSON FIle and account for previous results

In [34]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)
## If it does not exist: 
if file_exists == False:    
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len (folder) > 0:
        # create the folder
        os.makedirs(folder, exist_ok = True)
        
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open (JSON_FILE, 'w') as f:
        json.dump([],f)
        
## If it exists, inform user
else:
    print(f"[i] {JSON_FILE} already exists.")

[i] Data/San Antonio-boba tea.json already exists.


In [35]:
## Load previous results and use len of results for offset

## set offset based on previous results


### Make the first API call to get the first page of data

- We will use this first result to check:
    - how many total results there are?
    - Where is the actual data we want to save?
    - how many results do we get at a time?

In [36]:
# use our yelp_api variable's search_query method to perform our API call
results = yelp.search_query(term = term, location = location)

In [37]:
type(results)

dict

In [38]:
len(results)

3

In [39]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [40]:
results['total']

82

In [41]:
results['businesses']

[{'id': 'cCkVeldKwdH8Tz8AAgX9tA',
  'alias': 'feng-cha-boba-tea-and-desserts-san-antonio',
  'name': 'Feng Cha Boba Tea and Desserts',
  'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/V4kz7BG2mTI8wOu6NtQB3g/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/feng-cha-boba-tea-and-desserts-san-antonio?adjust_creative=AzyqDaAFoaPEVDlGQs1aoQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=AzyqDaAFoaPEVDlGQs1aoQ',
  'review_count': 23,
  'categories': [{'alias': 'desserts', 'title': 'Desserts'},
   {'alias': 'bubbletea', 'title': 'Bubble Tea'},
   {'alias': 'juicebars', 'title': 'Juice Bars & Smoothies'}],
  'rating': 4.5,
  'coordinates': {'latitude': 29.534712935323807,
   'longitude': -98.52122847342896},
  'transactions': [],
  'price': '$$',
  'location': {'address1': '8055 West Ave',
   'address2': 'Ste 100',
   'address3': None,
   'city': 'San Antonio',
   'zip_code': '78213',
   'country': 'US',
   'state': 'TX',
   'display_address': ['80

In [42]:
## How many results total?
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,cCkVeldKwdH8Tz8AAgX9tA,feng-cha-boba-tea-and-desserts-san-antonio,Feng Cha Boba Tea and Desserts,https://s3-media1.fl.yelpcdn.com/bphoto/V4kz7B...,False,https://www.yelp.com/biz/feng-cha-boba-tea-and...,23,"[{'alias': 'desserts', 'title': 'Desserts'}, {...",4.5,"{'latitude': 29.534712935323807, 'longitude': ...",[],$$,"{'address1': '8055 West Ave', 'address2': 'Ste...",,,3919.663391
1,etxlgtnLSkeqhyz2g3yVUg,gong-cha-san-antonio-4,Gong Cha,https://s3-media3.fl.yelpcdn.com/bphoto/krbWMR...,False,https://www.yelp.com/biz/gong-cha-san-antonio-...,23,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}]",4.5,"{'latitude': 29.603546, 'longitude': -98.442823}",[],$$,"{'address1': '2903 N Loop 1604 E', 'address2':...",12105921011.0,(210) 592-1011,8321.205237
2,H9vRFCs52gNEoyRx89RDTg,t-pioca-san-antonio,T-Pioca,https://s3-media4.fl.yelpcdn.com/bphoto/axy88z...,False,https://www.yelp.com/biz/t-pioca-san-antonio?a...,84,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.5,"{'latitude': 29.531959, 'longitude': -98.484352}","[delivery, pickup]",$,"{'address1': '10100 Reunion Pl', 'address2': '...",12068227163.0,(206) 822-7163,609.463215
3,OKK9DHGOTHBw5Uob_1YmcQ,y-bubble-san-antonio-7,Y Bubble,https://s3-media2.fl.yelpcdn.com/bphoto/sWmW5B...,False,https://www.yelp.com/biz/y-bubble-san-antonio-...,41,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.5,"{'latitude': 29.599058, 'longitude': -98.417534}",[],$$,"{'address1': '17306 Bulverde Rd', 'address2': ...",12103325182.0,(210) 332-5182,9270.310669
4,K6AQLCseRMjJwM1wHvYL1g,kokee-tea-san-antonio,Kokee Tea,https://s3-media2.fl.yelpcdn.com/bphoto/5eIGEJ...,False,https://www.yelp.com/biz/kokee-tea-san-antonio...,28,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}]",3.5,"{'latitude': 29.51838, 'longitude': -98.49789}","[delivery, pickup]",,"{'address1': '7400 San Pedro Ave', 'address2':...",18055512903.0,(805) 551-2903,2606.361855
5,frRZnVjfD_u4_nMsymb9aw,premium-matcha-cafe-maiko-san-antonio,Premium Matcha Cafe Maiko,https://s3-media2.fl.yelpcdn.com/bphoto/-0EX46...,False,https://www.yelp.com/biz/premium-matcha-cafe-m...,174,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 29.522676, 'longitude': -98.506518}","[delivery, pickup]",$,"{'address1': '7115 Blanco Rd', 'address2': 'St...",12102671020.0,(210) 267-1020,2920.0271
6,R4iAk9sxqlxUl0S9Ae4ilw,the-nest-boba-castle-hills,The Nest Boba,https://s3-media2.fl.yelpcdn.com/bphoto/DVUdG3...,False,https://www.yelp.com/biz/the-nest-boba-castle-...,59,"[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",4.0,"{'latitude': 29.51859, 'longitude': -98.5078}","[delivery, pickup]",$,"{'address1': '6903 Blanco Rd', 'address2': Non...",12104449596.0,(210) 444-9596,3276.242186
7,dF9K2A375cKe2cjiWirWsA,kung-fu-tea-san-antonio-5,Kung Fu Tea,https://s3-media2.fl.yelpcdn.com/bphoto/DGlLZ2...,False,https://www.yelp.com/biz/kung-fu-tea-san-anton...,13,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.5,"{'latitude': 29.605787, 'longitude': -98.467322}",[],,"{'address1': '18130 San Pedro Ave Suite 108', ...",,,7818.321896
8,-3ZYeM_1GKyVlyol49HTxQ,lustea-san-antonio,Lustea,https://s3-media4.fl.yelpcdn.com/bphoto/2H-fWo...,False,https://www.yelp.com/biz/lustea-san-antonio?ad...,78,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...",3.5,"{'latitude': 29.519258402741237, 'longitude': ...",[delivery],$$,"{'address1': '602 NW Loop 410', 'address2': 'S...",12105980089.0,(210) 598-0089,2875.719569
9,GbRL6LrBAJgaOUZh78GrlA,sp-café-san-antonio,SP Café,https://s3-media2.fl.yelpcdn.com/bphoto/1-9Tfa...,False,https://www.yelp.com/biz/sp-caf%C3%A9-san-anto...,317,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.0,"{'latitude': 29.51915204792389, 'longitude': -...",[delivery],$$,"{'address1': '602 NW Loop 410', 'address2': 'S...",12105419689.0,(210) 541-9689,2903.264535


- Where is the actual data we want to save?

In [43]:
## How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [44]:
(results['total'])/results_per_page

4.1

- Calculate how many pages of results needed to cover the total_results

In [48]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/results_per_page)
n_pages

5

In [51]:
for i in tqdm_notebook(range(1,n_pages+1)):
    ## The block of code we want to TRY to run
    try: 
        
        time.sleep(.2)
        
        ## Read in results in progress file and check the length
        with open(JSON_FILE, 'r') as f:
            previous_results = json.load(f)
        
        ## save number of results for to use as offset
        n_results = len(previous_results)
        
        
        ## use n_results as the OFFSET 
        results = yelp.search_query(location = location, term = term,
                                   offset = n_results+1)

        ## append new results and save to file
        previous_results.extend(results['businesses'])
        
        with open (JSON_FILE, 'w') as f:
            json.dump(previous_results, f)

            
    ## What to do if we get an error/exception.
    except Exception as e:
        print('[!] ERROR', e)


  0%|          | 0/5 [00:00<?, ?it/s]

## Open the Final JSON File with Pandas

In [52]:
df = pd.read_json(JSON_FILE)

In [53]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,etxlgtnLSkeqhyz2g3yVUg,gong-cha-san-antonio-4,Gong Cha,https://s3-media3.fl.yelpcdn.com/bphoto/krbWMR...,False,https://www.yelp.com/biz/gong-cha-san-antonio-...,23,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}]",4.5,"{'latitude': 29.603546, 'longitude': -98.442823}",[],$$,"{'address1': '2903 N Loop 1604 E', 'address2':...",12105921011,(210) 592-1011,8321.205237
1,H9vRFCs52gNEoyRx89RDTg,t-pioca-san-antonio,T-Pioca,https://s3-media4.fl.yelpcdn.com/bphoto/axy88z...,False,https://www.yelp.com/biz/t-pioca-san-antonio?a...,84,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.5,"{'latitude': 29.531959, 'longitude': -98.484352}","[pickup, delivery]",$,"{'address1': '10100 Reunion Pl', 'address2': '...",12068227163,(206) 822-7163,609.463215
2,OKK9DHGOTHBw5Uob_1YmcQ,y-bubble-san-antonio-7,Y Bubble,https://s3-media2.fl.yelpcdn.com/bphoto/sWmW5B...,False,https://www.yelp.com/biz/y-bubble-san-antonio-...,41,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}...",4.5,"{'latitude': 29.599058, 'longitude': -98.417534}",[],$$,"{'address1': '17306 Bulverde Rd', 'address2': ...",12103325182,(210) 332-5182,9270.310669
3,K6AQLCseRMjJwM1wHvYL1g,kokee-tea-san-antonio,Kokee Tea,https://s3-media2.fl.yelpcdn.com/bphoto/5eIGEJ...,False,https://www.yelp.com/biz/kokee-tea-san-antonio...,28,"[{'alias': 'bubbletea', 'title': 'Bubble Tea'}]",3.5,"{'latitude': 29.51838, 'longitude': -98.49789}","[pickup, delivery]",,"{'address1': '7400 San Pedro Ave', 'address2':...",18055512903,(805) 551-2903,2606.361855
4,frRZnVjfD_u4_nMsymb9aw,premium-matcha-cafe-maiko-san-antonio,Premium Matcha Cafe Maiko,https://s3-media2.fl.yelpcdn.com/bphoto/-0EX46...,False,https://www.yelp.com/biz/premium-matcha-cafe-m...,174,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"{'latitude': 29.522676, 'longitude': -98.506518}","[pickup, delivery]",$,"{'address1': '7115 Blanco Rd', 'address2': 'St...",12102671020,(210) 267-1020,2920.0271


In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74 entries, 0 to 73
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             74 non-null     object 
 1   alias          74 non-null     object 
 2   name           74 non-null     object 
 3   image_url      74 non-null     object 
 4   is_closed      74 non-null     bool   
 5   url            74 non-null     object 
 6   review_count   74 non-null     int64  
 7   categories     74 non-null     object 
 8   rating         74 non-null     float64
 9   coordinates    74 non-null     object 
 10  transactions   74 non-null     object 
 11  price          51 non-null     object 
 12  location       74 non-null     object 
 13  phone          74 non-null     object 
 14  display_phone  74 non-null     object 
 15  distance       74 non-null     float64
dtypes: bool(1), float64(2), int64(1), object(12)
memory usage: 8.9+ KB


In [59]:
## convert the filename to a .csv.gz
csv_file = JSON_FILE.replace('.json','.csv.gz')
csv_file

'Data/San Antonio-boba tea.csv.gz'

In [60]:
## Save it as a compressed csv (to save space)
df.to_csv(csv_file,compression = 'gzip', index = False)