In [22]:
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
import time
import joblib as jb
import config
from collections import OrderedDict

%matplotlib inline

In [7]:
# yelp credentials
client_id = config.client_id
api_key = config.api_key

### Example API Call

In [77]:
def get_businesses(location, term, api_key, sleep=False):
    '''
    
    '''
    headers = {'Authorization': f'Bearer {api_key}'}
    url = 'https://api.yelp.com/v3/businesses/search'

    data = []
    for offset in range(0, 1000, 50):
        params = {
            'limit': 50, 
            'location': location.replace(' ', '+'),
            'term': term.replace(' ', '+'),
            'offset': offset
        }

        response = requests.get(url, headers=headers, params=params)
        
        assert type(sleep) == int or type(sleep) == float, "Enter sleep time in seconds"
        if sleep:
            time.sleep(sleep)
        
        if response.status_code == 200:
            data += response.json()['businesses']
        
        elif response.status_code == 400:
            print('400 Bad Request')
            break

    return data 

In [79]:
%%time 

businesses = []
zip_codes = ['07302', '07303', '07304', '07305', '07306', '07307',
             '07308', '07310', '07311', '07395', '07303', '07399']


for zip_cd in zip_codes:
    sleep = np.random.uniform(0.5, 1.5)
    businesses += get_businesses(zip_cd, term, api_key, sleep)

Wall time: 5min 44s


In [2]:
n_search = 2200
n_zips = 12

In [3]:
n_search = len(businesses)
n_zips = len(zip_codes)
print(f'Searching through {n_zips} zip codes yielded {n_search} results')

Searching through 12 zip codes yielded 2200 results


In [87]:
unique_businesses = [i for n, i in enumerate(businesses) if i not in businesses[n + 1:]]

In [6]:
n_unique = len(unique_businesses)
print(f'Out of {n_search} search results, There are {n_unique} unique entries results')

Out of 2200 search results, There are 1855 unique entries results


### Business Search Example Result

In [66]:
business_search_result = unique_businesses[0]
business_search_result

{'id': 'r4SazXX-ISikdRYWbH-HAg',
 'alias': 'hudson-and-co-jersey-city',
 'name': 'Hudson & Co',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/y9xdVdAw-hn22JHcIT0IQQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/hudson-and-co-jersey-city?adjust_creative=oXRz7HWebFp6YdQ0YT3PMQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=oXRz7HWebFp6YdQ0YT3PMQ',
 'review_count': 521,
 'categories': [{'alias': 'gastropubs', 'title': 'Gastropubs'},
  {'alias': 'newamerican', 'title': 'American (New)'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'}],
 'rating': 4.0,
 'coordinates': {'latitude': 40.720988, 'longitude': -74.031573},
 'transactions': ['delivery', 'pickup'],
 'price': '$$$',
 'location': {'address1': '3 2nd St',
  'address2': '',
  'address3': None,
  'city': 'Jersey City',
  'zip_code': '07302',
  'country': 'US',
  'state': 'NJ',
  'display_address': ['3 2nd St', 'Jersey City, NJ 07302']},
 'phone': '+12016857330',
 'display_phone': '

### Business Details Search Example 

In [3]:
yelp_id = 'r4SazXX-ISikdRYWbH-HAg'
business_alias = 'hudson-and-co-jersey-city'
business_name = 'Hudson & Co'

In [90]:
def get_business_details(yelp_id, api_key):
    '''
    
    '''
    headers = {'Authorization': f'Bearer {api_key}'}
    url = f'https://api.yelp.com/v3/businesses/{yelp_id}'
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:   
        return response.json()
    else:
        return response.status_code

# save business details and unpack the address later, and categories later
business_details = get_business_details(yelp_id, api_key)
address_information = business_details['location']

categories = business_details['categories']
categories = [category['alias'] for category in categories]
category_string = ';'.join(categories)

# update categories
business_details['categories'] = category_string

In [91]:
business_details

{'id': 'r4SazXX-ISikdRYWbH-HAg',
 'alias': 'hudson-and-co-jersey-city',
 'name': 'Hudson & Co',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/y9xdVdAw-hn22JHcIT0IQQ/o.jpg',
 'is_claimed': True,
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/hudson-and-co-jersey-city?adjust_creative=oXRz7HWebFp6YdQ0YT3PMQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_lookup&utm_source=oXRz7HWebFp6YdQ0YT3PMQ',
 'phone': '+12016857330',
 'display_phone': '(201) 685-7330',
 'review_count': 530,
 'categories': 'gastropubs;newamerican;cocktailbars',
 'rating': 4.0,
 'location': {'address1': '3 2nd St',
  'address2': '',
  'address3': None,
  'city': 'Jersey City',
  'zip_code': '07302',
  'country': 'US',
  'state': 'NJ',
  'display_address': ['3 2nd St', 'Jersey City, NJ 07302'],
  'cross_streets': ''},
 'coordinates': {'latitude': 40.720988, 'longitude': -74.031573},
 'photos': ['https://s3-media2.fl.yelpcdn.com/bphoto/y9xdVdAw-hn22JHcIT0IQQ/o.jpg',
  'https://s3-media3.fl.yelpcdn.co

### Exammple Business Reviews Search

In [97]:
def get_business_reviews(yelp_id, api_key, sort_by='data'):
    '''
    
    '''
    headers = {'Authorization': f'Bearer {api_key}'}
    url = f'https://api.yelp.com/v3/businesses/{yelp_id}/reviews'
    params = {'sort_by': sort_by}
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:   
        return response.json()['reviews']
    else:
        return response.status_code
    
reviews = get_business_reviews(yelp_id, api_key)    

In [98]:
# Sort nested dictionary by key
order_reviews = sorted(reviews, key=lambda d: d['time_created'], reverse=True) 
most_recent_review = order_reviews[0]

# remove the posting user information
del most_recent_review['user']

In [100]:
# merge the data together
# if overlapping keys, use the business_details keys 
business_details_all = {**most_recent_review, **business_search_result, **address_information, **business_details}

del business_details_all['location']
del business_details_all['image_url'] 
del business_details_all['coordinates']
del business_details_all['distance']

# convert dictionary to pandas dataframe
df = pd.DataFrame.from_dict(business_details_all, orient='index').T

# save dataframe as a csv
df.to_csv('yelp_merged_results.csv', index=False)