In [70]:
#Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import math
import requests
import time

In [71]:
#Retrieving API Key

api_key_file = open('../assets/api-key.txt')
api_key = api_key_file.read().strip()
api_key_file.close()

In [72]:
def get_restaurants_at_city(city, amount = 50):
    """Gets a list of restaurant as dictionaries from a city through the yelp fusion api
    Returns an empty list if nothing found"""
    restaurant_list = []
    url = "https://api.yelp.com/v3/businesses/search"

    params = {
        'categories' : 'restaurants',
        'location' : city,
        'limit' : amount
    }
    
    headers = {
        'Authorization': 'Bearer ' + api_key
    }
    
    #Makes request from yelp api
    req = requests.get(url, params, headers=headers)
    
    if req.status_code == 200:
        restaurants = req.json()
        restaurants = restaurants['businesses']
        #Adds each restaurant as a dictionary to list
        for restaurant in restaurants:
            if restaurant['is_closed'] == False:
                
                restaurant_list.append({'name':restaurant['name'], 
                                        'review_count':restaurant['review_count'],
                                        'id':restaurant['id'],
                                        'rating':restaurant['rating']})
            
    else:
        print("None found")
        print(req.status_code)

    return restaurant_list
        

In [83]:
def get_reviews_from_restaurant(restaurant):
    """Uses the restaurants URL to grab all the reviews from a restaurant
    """
    review_list = []
    
    params = {
        'start' :'0'  #going to update
    }
   
    #Loop to make requests to consecutive pages for reviews
    for i in range(math.ceil(restaurant['review_count'] / 10)):
        url = f"https://www.yelp.com/biz/{restaurant['id']}/review_feed?rl=en&q=&sort_by=relevance_desc"
        req = requests.get(url, params)
        
        if req.status_code == 200:
        
            reviews = req.json()

            #Find the list of all the reviews
            reviews = reviews['reviews']

            for review in reviews:
                #going to want: restaurant name, customer review, restaurant review, body of text of the review

                review_list.append([restaurant['name'],
                                    restaurant['rating'],
                                    review['rating'],
                                    review['comment']['text']
                                   ])

            params['start'] = str(int(params['start']) + 10)
                                
        else:
            print(req.status_code)

    print(f"Restaurant: {restaurant['name']}\nNumber of Reviews: {restaurant['review_count']}\nReviews found: {len(review_list)}")

    return review_list
        

In [84]:
def get_reviews_from_restaurants(restaurants, city):
    """Takes a lists of restaurants and returns a dataframe
    of their reviews"""
    reviews_list = []
    
    for restaurant in restaurants:
        print(f"Getting {restaurant['review_count']} reviews for restaurant: {restaurant['name']}")
        start = time.perf_counter()
        reviews = get_reviews_from_restaurant(restaurant)
        reviews_list.extend(reviews)
        end = time.perf_counter()
        print(f'Took {(start - end) / 60} minutes')
        
    df = pd.DataFrame(reviews_list, columns = ['restaurant_name', 'restaurant_rating', 'customer_rating', 'review_text'])
    df.drop_duplicates()
    df.to_csv(f'../data/{city}_restaurant_reviews')
    return df

In [85]:
restaurants = get_restaurants_at_city('San Francisco')

In [87]:
df = get_reviews_from_restaurants(restaurants[0:5], 'San Francisco')

Getting 11771 reviews for restaurant: Brenda's French Soul Food
Restaurant: Brenda's French Soul Food
Number of Reviews: 11771
Reviews found: 11756
Took -7.014973561666648 minutes
Getting 5764 reviews for restaurant: Gary Danko
Restaurant: Gary Danko
Number of Reviews: 5764
Reviews found: 5763
Took -3.3693323950000074 minutes
Getting 8573 reviews for restaurant: Tartine Bakery
Restaurant: Tartine Bakery
Number of Reviews: 8573
Reviews found: 8553
Took -4.543903938333339 minutes
Getting 6708 reviews for restaurant: Hog Island Oyster
Restaurant: Hog Island Oyster
Number of Reviews: 6708
Reviews found: 6675
Took -3.874344731666679 minutes
Getting 8194 reviews for restaurant: House of Prime Rib
Restaurant: House of Prime Rib
Number of Reviews: 8194
Reviews found: 8183
Took -4.635882406666679 minutes
