In [128]:
import requests
import json
import time
from bs4 import BeautifulSoup
import pickle
import pandas as pd

In [35]:
def read_api_header(filepath):
    """
    Read the Yelp API Key from file.
    Args:
        filepath (string): File containing API Key
    Returns:
        api_key (string): The API Key
    """
    with open(filepath, 'r') as f:
        api_key = f.read().replace('\n','')
    API_HEADERS = {
        'Authorization': ' '.join(['Bearer', api_key])
    }
    return API_HEADERS

# read_api_header('api_key.txt')

In [60]:
def retrieveHTML(URL, API_HEADERS, params):
    response = requests.get(url=URL, headers=API_HEADERS, params=params)
    print('status: ',response.status_code) 
    return response.content


In [61]:
def all_restaurants(URL, params):
    # get api header
    API_HEADERS = read_api_header('api_key.txt')
    
    c = retrieveHTML(URL, API_HEADERS, params)
    c = json.loads(c)
    print(c.keys())
    return c




In [62]:
url = 'https://api.yelp.com/v3/businesses/search'
params = {'location': 'Shanghai', 'limit':50}
data = all_restaurants(URL, params)

status:  200
dict_keys(['businesses', 'total', 'region'])


### For each restaurant, extract it's url

In [50]:
data['businesses'][0].keys()

dict_keys(['id', 'alias', 'name', 'image_url', 'is_closed', 'url', 'review_count', 'categories', 'rating', 'coordinates', 'transactions', 'price', 'location', 'phone', 'display_phone', 'distance'])

In [64]:
def parse_api_response(data):
    """
    Parse Yelp API results to extract restaurant URLs.
    
    Args:
        data (string): String of properly formatted JSON.

    Returns:
        (list): list of URLs as strings from the input JSON.
    """
    return [i['url'] for i in data['businesses']]
    

### parsing a page

In [None]:
url_list = parse_api_response(data)
for url in url_list:
    print(url)
    html = requests.get(url).content
    print(html)
    break

In [111]:
def parse_page(html):
    """
    Parse the reviews on a single page of a restaurant.
    
    Args:
        html (string): String of HTML corresponding to a Yelp restaurant

    Returns:
        tuple(list, string): a tuple of two elements
            first element: list of dictionaries corresponding to the extracted review information
            second element: URL for the next page of reviews (or None if it is the last page)
    """
    
    root = BeautifulSoup(html, 'html5lib')
    
    # find reviews
    reviews = root.find_all('div', class_='review review--with-sidebar')
    print('This page has reviews: %i' %len(reviews))
    
    # extract data from each review
    collection = []
    for i in reviews:
        d = {}
        d['user_name'] = i.find('li', class_='user-name').text.strip()
        d['rates'] = float(i.find('div', class_='i-stars')['title'].split()[0])
        d['date'] = i.find('span', class_='rating-qualifier').text.strip()
        d['text'] = i.find('p', lang='en').text
        collection.append(d)
    
    # next page url
    try:
        nextPage_url = root.find('a', class_='u-decoration-none next pagination-links_anchor')['href']
    except TypeError:
        nextPage_url = None
    print('Next page is: ', nextPage_url)
    
    return collection, nextPage_url


## parsing all

In [137]:
def extract_reviews(url):
    """
    Retrieve ALL of the reviews for a single restaurant on Yelp.

    Parameters:
        url (string): Yelp URL corresponding to the restaurant of interest.

    Returns:
        reviews (list): list of dictionaries containing extracted review information
    """
    
    data = []
    nextPage_url = url
    while nextPage_url != None:
        time.sleep(1)
        html = requests.get(nextPage_url).content
        collection, nextPage_url = parse_page(html)
        data.extend(collection)
        print('Now has collect %i reviews' %len(data))
    return data

In [138]:
one_res_rev = extract_reviews(url)

This page has reviews: 20
Next page is:  https://www.yelp.com/biz/bonnie-blue-winchester?adjust_creative=tECPEa3JejhblH7HDAej1g&start=20&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=tECPEa3JejhblH7HDAej1g
Now has collect 20 reviews
This page has reviews: 20
Next page is:  https://www.yelp.com/biz/bonnie-blue-winchester?adjust_creative=tECPEa3JejhblH7HDAej1g&start=40&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=tECPEa3JejhblH7HDAej1g
Now has collect 40 reviews
This page has reviews: 20
Next page is:  https://www.yelp.com/biz/bonnie-blue-winchester?adjust_creative=tECPEa3JejhblH7HDAej1g&start=60&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=tECPEa3JejhblH7HDAej1g
Now has collect 60 reviews
This page has reviews: 20
Next page is:  https://www.yelp.com/biz/bonnie-blue-winchester?adjust_creative=tECPEa3JejhblH7HDAej1g&start=80&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=tECPEa3JejhblH7HDAej1g

### write into csv

In [139]:
one_res_rev

[{'user_name': 'Eron G.',
  'rates': 5.0,
  'date': '2/24/2019',
  'text': 'Went there today for the first time. Amazing cheesy grits! Our server was super nice and patient. I also got bakery items. The not sure what it was but the fudgey peanut butter thing was great.'},
 {'user_name': 'Kristen W.',
  'rates': 3.0,
  'date': '2/19/2019',
  'text': "I'm a big BBQ gal as I used to work at a BBQ restaurant. Therefore, whenever I eat it, I judge it pretty hard. Bonnie blue is a cute little restaurant where the back half of the restaurant has tables for dinner/lunch and the front part has more of a cafe feel. They sale bakery items and coffee up in the front. They sale Natty Boh beer so if you are a MD person this is where to get that in the ole VA.I have eaten at bonnie blue multiple times so I have been able to try many dishes here.1) Shrimp and Grits - hard no for me. Shrimp and Grits is probably my favorite dish to eat and the way BB prepared it was just not great for the ole taste bud

In [140]:
df = pd.DataFrame(one_res_rev)
df.head()

Unnamed: 0,date,rates,text,user_name
0,2/24/2019,5.0,Went there today for the first time. Amazing c...,Eron G.
1,2/19/2019,3.0,I'm a big BBQ gal as I used to work at a BBQ r...,Kristen W.
2,2/13/2019,5.0,"It feels dramatic, but I think I had some of t...",Abi M.
3,12/30/2018,5.0,Amazing layout in and old building that just s...,Shawna C.
4,12/1/2018,2.0,"Very reasonably priced with large portions, bu...",Claire W.


In [141]:
df.to_csv('one_restau_reviews.csv', encoding='utf-8')