In [24]:
#Imports

import numpy as np
import pandas as pd

import math
import requests
import time

In [2]:
#Retrieving API Key

api_key_file = open('../assets/api-key.txt')
api_key = api_key_file.read().strip()
api_key_file.close()

In [3]:
def get_restaurants_at_city(city, amount = 50):
    """Gets a list of restaurant as dictionaries from a city through the yelp fusion api
    Returns an empty list if nothing found"""
    restaurant_list = []
    url = "https://api.yelp.com/v3/businesses/search"

    params = {
        'categories' : 'restaurants',
        'location' : city,
        'limit' : amount
    }
    
    headers = {
        'Authorization': 'Bearer ' + api_key
    }
    
    #Makes request from yelp api
    req = requests.get(url, params, headers=headers)
    
    if req.status_code == 200:
        restaurants = req.json()
        restaurants = restaurants['businesses']
        #Adds each restaurant as a dictionary to list
        for restaurant in restaurants:
            if restaurant['is_closed'] == False:
                
                restaurant_list.append({'name':restaurant['name'], 
                                        'review_count':restaurant['review_count'],
                                        'id':restaurant['id'],
                                        'rating':restaurant['rating']})
            
    else:
        print("None found")
        print(req.status_code)

    return restaurant_list
        

In [21]:
def get_reviews_from_restaurant(restaurant, num_reviews):
    """Uses the restaurants URL to grab all the reviews from a restaurant
    """
    review_list = []
    
    params = {
        'start' :'0'  #going to update
    }
    #Loop to make requests to consecutive pages for reviews
    for i in range(math.ceil(num_reviews / 10)):
        url = f"https://www.yelp.com/biz/{restaurant['id']}/review_feed?rl=en&q=&sort_by=relevance_desc"
        req = requests.get(url, params)
        
        if req.status_code == 200:
        
            reviews = req.json()

            #Find the list of all the reviews
            reviews = reviews['reviews']

            for review in reviews:
                #going to want: restaurant name, customer review, restaurant review, body of text of the review

                review_list.append([restaurant['name'],
                                    restaurant['rating'],
                                    review['rating'],
                                    review['comment']['text']
                                   ])

            params['start'] = str(int(params['start']) + 10)
                                
        else:
            print(req.status_code)

    print(f"Restaurant: {restaurant['name']}\nNumber of Reviews: {restaurant['review_count']}\nReviews found: {len(review_list)}")

    return review_list
        

In [19]:
def get_reviews_from_restaurants(restaurants, city, num_reviews = 0):
    """Takes a lists of restaurants and returns a dataframe of their reviews
    If num_reviews is defaulted it will get all of the reviews"""
    reviews_list = []        
    
    for restaurant in restaurants:
        print(f"Getting {restaurant['review_count']} reviews for restaurant: {restaurant['name']}")
        
        if num_reviews == 0: 
            num_reviews = restaurant['review_count']
            
        start = time.perf_counter()
        reviews = get_reviews_from_restaurant(restaurant, num_reviews)
        reviews_list.extend(reviews)
        end = time.perf_counter()
        print(f'Took {(end - start) / 60} minutes')
        
    df = pd.DataFrame(reviews_list, columns = ['restaurant_name', 'restaurant_rating', 'customer_rating', 'review_text'])
    df.drop_duplicates()
    return df

In [6]:
# Getting the first 50 restaurants listed for San Francisco
restaurants = get_restaurants_at_city('San Francisco')

In [7]:
restaurants

[{'name': "Brenda's French Soul Food",
  'review_count': 11769,
  'id': 'lJAGnYzku5zSaLnQ_T6_GQ',
  'rating': 4.0},
 {'name': 'Gary Danko',
  'review_count': 5765,
  'id': 'WavvLdfdP6g8aZTtbBQHTw',
  'rating': 4.5},
 {'name': 'Tartine Bakery',
  'review_count': 8574,
  'id': 'ri7UUYmx21AgSpRsf4-9QA',
  'rating': 4.0},
 {'name': 'Hog Island Oyster',
  'review_count': 6709,
  'id': 'Xg-FyjVKAN70LO4u4Z1ozg',
  'rating': 4.5},
 {'name': 'House of Prime Rib',
  'review_count': 8196,
  'id': 'oT08T3Vpn1I7jDmrBBRMTw',
  'rating': 4.0},
 {'name': 'Fog Harbor Fish House',
  'review_count': 8393,
  'id': 'f-m7-hyFzkf0HSEeQ2s-9A',
  'rating': 4.5},
 {'name': 'Burma Superstar',
  'review_count': 7295,
  'id': 'eYXwVR4mMAjzkJnm5wneHQ',
  'rating': 4.0},
 {'name': 'Kokkari Estiatorio',
  'review_count': 4954,
  'id': 'PsY5DMHxa5iNX_nX0T-qPA',
  'rating': 4.5},
 {'name': 'San Tung',
  'review_count': 7705,
  'id': 'M0JTO3oyu6gxh1mfFjU-dA',
  'rating': 4.0},
 {'name': 'Marufuku Ramen',
  'review_count

In [22]:
# Getting 1000 reviews for all of the cities retrieved
df = get_reviews_from_restaurants(restaurants, 'San Francisco', 1000)

Getting 11769 reviews for restaurant: Brenda's French Soul Food
Restaurant: Brenda's French Soul Food
Number of Reviews: 11769
Reviews found: 1000
Took 0.6968701133333336 minutes
Getting 5765 reviews for restaurant: Gary Danko
Restaurant: Gary Danko
Number of Reviews: 5765
Reviews found: 1000
Took 0.7404021199999988 minutes
Getting 8574 reviews for restaurant: Tartine Bakery
Restaurant: Tartine Bakery
Number of Reviews: 8574
Reviews found: 1000
Took 0.60748262 minutes
Getting 6709 reviews for restaurant: Hog Island Oyster
Restaurant: Hog Island Oyster
Number of Reviews: 6709
Reviews found: 1000
Took 0.6701044233333315 minutes
Getting 8196 reviews for restaurant: House of Prime Rib
Restaurant: House of Prime Rib
Number of Reviews: 8196
Reviews found: 1000
Took 0.6711769583333346 minutes
Getting 8393 reviews for restaurant: Fog Harbor Fish House
Restaurant: Fog Harbor Fish House
Number of Reviews: 8393
Reviews found: 1000
Took 2.3366230450000027 minutes
Getting 7295 reviews for restauran

In [23]:
# Saving to a csv
df.to_csv(f'../data/San_Francisco_restaurant_reviews.csv')