## Using the yelp API to get restaurant data from San Francisco

In [1]:
# https://www.yelp.com/developers/documentation/v3/business_search

In [2]:
#Import the required dependencies
import requests
import pandas as pd
import json
from urllib.parse import urlparse
import time

#Import the API Key
from config import yelp_key

In [3]:
#Define parameters for our query

#Define the header
headers = {
        'Authorization': 'Bearer %s' %yelp_key
    }

#Create a list to store the data retrieved
complete_business_data = []

#Starting the URL for yelp businesses API call
url = "https://api.yelp.com/v3/businesses/search"

#Set counters
record_count = 0

#Carry out the process until we get an offset of 1000, according to the yelp documentation

for offset in range(0,1000,50): #previous code stoped at retrieval 9(450), then at 5 therefore 850
    parameters = {'location': 'San Francisco',
                    'term':'restaurants',
                    'limit':50,
                    'radius':30000,
                   'offset':offset
                  }
    record_count+=1
    print("-----------------------------------------------------")
    print(f"Starting retrieval number {record_count}")
    

    #Make a request to Yelp API
    response = requests.get(url= url, params= parameters, headers= headers)
    business_data = response.json()

    #Iterate through every response from the API to store the data retrieved
    for business in business_data['businesses']:

        try:
            business_id = business['id']
            business_name = business['name']
            business_image = business['image_url']
            business_url_yelp = business['url']
            business_review_count = business['review_count']
            business_category = business['categories'][0]['title']
            business_rating = business['rating']
            business_price = business['price']
            business_lat = business['coordinates']['latitude']
            business_lng = business['coordinates']['longitude']
            business_address = business['location']['address1']
            business_city =  business['location']['city']
            business_state =  business['location']['state']
            business_country =  business['location']['country']
            business_phone = business['display_phone']

            complete_business_data.append({ 'ID': business_id,
                                           'Name':business_name,
                                          'Image':business_image,
                                          'Url':business_url_yelp,
                                           'Review count': business_review_count,
                                          'Category':business_category,
                                           'Rating': business_rating,
                                          'Price':business_price,
                                          'Latitude':business_lat,
                                          'Longitude':business_lng,
                                          'Address':business_address,
                                          'City':business_city,
                                           'State': business_state,
                                           'Country':business_country,
                                          'Phone':business_phone})
        except:
            print('This restaurant has missing information. Skipping...')
            pass

    #Make a request to the Yelp businesses reviews API
    for dict in complete_business_data:
        dict['Review'] = []
        res_url = dict['Url']
        res_id = dict['ID']
        endpoint = 'https://api.yelp.com/v3/businesses/{}/reviews'.format(res_id)
        response = requests.get(url= endpoint, headers= headers)
        business_reviews = response.json() #https://www.yelp-support.com/article/How-is-the-order-of-reviews-determined?}

        #Parse the urls from the restaurant and the review    
        for review in business_reviews['reviews']:
                review_text = review['text']
                review_url = review['url']
                parse_res_url = urlparse(res_url)
                parse_rev_url = urlparse(review_url)

                #Compare URL from restaurants and reviews to pair them up
                if parse_res_url[2] == parse_rev_url[2]:
                    dict['Review'].append(review_text)

    print(f"Finished retrieval number {record_count}")                             
    time.sleep(5)

print("Data retrieval complete")

-----------------------------------------------------
Starting retrieval number 1
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
Finished retrieval number 1
-----------------------------------------------------
Starting retrieval number 2
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaura

KeyError: 'reviews'

In [4]:
#Check the length of the data retrieved
len(complete_business_data)

585

In [5]:
#Store the data in a dataframe
restaurants_df = pd.DataFrame(complete_business_data)
restaurants_df.head(2)

Unnamed: 0,ID,Name,Image,Url,Review count,Category,Rating,Price,Latitude,Longitude,Address,City,State,Country,Phone,Review
0,f-m7-hyFzkf0HSEeQ2s-9A,Fog Harbor Fish House,https://s3-media2.fl.yelpcdn.com/bphoto/by8Hh6...,https://www.yelp.com/biz/fog-harbor-fish-house...,8049,Seafood,4.5,$$,37.808988,-122.410297,39 Pier,San Francisco,CA,US,(415) 969-2010,"[Good location and nice food, if you good to p..."
1,LPoRD0huneBqPZbg1wsJ_Q,Jamie's place,https://s3-media3.fl.yelpcdn.com/bphoto/RKaMaz...,https://www.yelp.com/biz/jamies-place-san-fran...,220,Asian Fusion,4.5,$$,37.762616,-122.465979,1380 9th Ave,San Francisco,CA,US,(415) 803-1888,[I really enjoyed this place. The spaciousness...


In [6]:
#Store the information in a CSV file
restaurants_df.to_csv('Restaurants_reviews_SF_full_',index=False,header=True)