## Using the yelp API to get restaurant data from San Francisco

In [24]:
# https://www.yelp.com/developers/documentation/v3/business_search

In [5]:
#Import the required dependencies
import requests
import pandas as pd
import json
from urllib.parse import urlparse
import time

#Import the API Key
from config import yelp_key

In [20]:
#Define parameters for our query

#Create a list of categories for the restaurants
categories = ['mexican','italian','japanese','seafood','tradamerican','chinese','cafes']

#Define the header
headers = {
        'Authorization': 'Bearer %s' %yelp_key
    }

#Create a list to store the data retrieved
complete_business_data = []

#Starting the URL for yelp businesses API call
url = "https://api.yelp.com/v3/businesses/search"

#Set counters
record_count = 1

#Carry out the process 7 times, requesting 50 restaurants per loop
for record_count in range(7):
    print("-----------------------------------------------------")
    print(f"Starting retrieval number {record_count+1}")
    
    parameters = {'location': 'San Franciso',
                    'term':'food',
                    'limit':50,
                    'radius':40000,
                   'category':categories[record_count-1]
                  }

    #Make a request to Yelp API
    response = requests.get(url= url, params= parameters, headers= headers)
    business_data = response.json()
    
    #Iterate through every response from the API to store the data retrieved
    for business in business_data['businesses']:

        try:
            business_id = business['id']
            business_name = business['name']
            business_image = business['image_url']
            business_url_yelp = business['url']
            business_review_count = business['review_count']
            business_category = business['categories'][0]['title']
            business_rating = business['rating']
            business_price = business['price']
            business_lat = business['coordinates']['latitude']
            business_lng = business['coordinates']['longitude']
            business_address = business['location']['address1']
            business_city =  business['location']['city']
            business_state =  business['location']['state']
            business_country =  business['location']['country']
            business_phone = business['display_phone']

            complete_business_data.append({ 'ID': business_id,
                                           'Name':business_name,
                                          'Image':business_image,
                                          'Url':business_url_yelp,
                                           'Review count': business_review_count,
                                          'Category':business_category,
                                           'Rating': business_rating,
                                          'Price':business_price,
                                          'Latitude':business_lat,
                                          'Longitude':business_lng,
                                          'Address':business_address,
                                          'City':business_city,
                                           'State': business_state,
                                           'Country':business_country,
                                          'Phone':business_phone})
        except:
            print('This restaurant has missing information. Skipping...')
            pass

    #Make a request to the Yelp businesses reviews API
    for dict in complete_business_data:
        dict['Review'] = []
        res_url = dict['Url']
        res_id = dict['ID']
        endpoint = 'https://api.yelp.com/v3/businesses/{}/reviews'.format(res_id)
        response = requests.get(url= endpoint, headers= headers)
        business_reviews = response.json() #https://www.yelp-support.com/article/How-is-the-order-of-reviews-determined?}
         
        #Parse the urls from the restaurant and the review    
        for review in business_reviews['reviews']:
                review_text = review['text']
                review_url = review['url']
                parse_res_url = urlparse(res_url)
                parse_rev_url = urlparse(review_url)

                #Compare URL from restaurants and reviews to pair them up
                if parse_res_url[2] == parse_rev_url[2]:
                    dict['Review'].append(review_text)
                    
    print(f"Finished retrieval number {record_count+1}")                             
    time.sleep(5)

print("Data retrieval complete")

This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
Finished retrieval number 1
Waiting to start the retrieval number 2
-----------------------------------------------------
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
Finished retrieval number 2
Waiting to start the retrieval number 3
-----------------------------------------------------
This restaurant has missing information. Skipping...
This restauran

In [21]:
#Check if the reviews were added correctly
len(complete_business_data)

307

In [22]:
#Store the data in a dataframe
restaurants_df = pd.DataFrame(complete_business_data)
restaurants_df.head(2)

Unnamed: 0,ID,Name,Image,Url,Review count,Category,Rating,Price,Latitude,Longitude,Address,City,State,Country,Phone,Review
0,i09UMzccKgyLwGYKDVP28w,Surisan,https://s3-media3.fl.yelpcdn.com/bphoto/TRmQFB...,https://www.yelp.com/biz/surisan-san-francisco...,2755,Korean,4.5,$$,37.80678,-122.41756,505 Beach St,San Francisco,CA,US,(415) 771-8449,[Surisan is like my second home because I come...
1,kvrQecqdGvnuVICMstZJmA,Dumpling Home,https://s3-media3.fl.yelpcdn.com/bphoto/GAjBPE...,https://www.yelp.com/biz/dumpling-home-san-fra...,582,Dim Sum,4.5,$$,37.775831,-122.422636,298 Gough St,San Francisco,CA,US,(415) 503-1666,[This lived up to the hype! One of the best XL...


In [23]:
#Store the information in a CSV filke
restaurants_df.to_csv('Restaurants_reviews_full.csv',index=False,header=True)