In [1]:
# https://www.yelp.com/developers/documentation/v3/business_search

## Using the yelp API to get restaurant data from San Francisco

In [2]:
#Import the required dependencies
import requests
import pandas as pd
import json
from urllib.parse import urlparse

#Import the API Key
from config import yelp_key

In [3]:
#Starting the URL for yelp API call
url = "https://api.yelp.com/v3/businesses/search"

#Define the header
headers = {
    'Authorization': 'Bearer %s' %yelp_key
}

In [4]:
#Define parameters for our query
parameters = {'location': 'San Franciso',
              'term':'food',
              'limit':50,
              'radius':40000,
              'offset':150
              }

In [5]:
#Make a request to the API
response = requests.get(url= url, params= parameters, headers= headers)
business_data = response.json()

In [6]:
#Create a list to store the data
complete_business_data = []

for business in business_data['businesses']:
    try:
        business_id = business['id']
        business_name = business['name']
        business_image = business['image_url']
        business_url_yelp = business['url']
        business_review_count = business['review_count']
        business_category = business['categories'][0]['title']
        business_rating = business['rating']
        business_price = business['price']
        business_lat = business['coordinates']['latitude']
        business_lng = business['coordinates']['longitude']
        business_address = business['location']['address1']
        business_city =  business['location']['city']
        business_state =  business['location']['state']
        business_country =  business['location']['country']
        business_phone = business['display_phone']
        
        complete_business_data.append({ 'ID': business_id,
                                       'Name':business_name,
                                      'Image':business_image,
                                      'Url':business_url_yelp,
                                       'Review count': business_review_count,
                                      'Category':business_category,
                                       'Rating': business_rating,
                                      'Price':business_price,
                                      'Latitude':business_lat,
                                      'Longitude':business_lng,
                                      'Address':business_address,
                                      'City':business_city,
                                       'State': business_state,
                                       'Country':business_country,
                                      'Phone':business_phone})
    except:
        print('This restaurant has missing information. Skipping...')
        pass
    
print("Data retrieval complete")
    

This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
This restaurant has missing information. Skipping...
Data retrieval complete


In [7]:
#Retrieve the ID data for the restaurantes and then request the reviews with that ID

for dict in complete_business_data:
    dict['Review'] = []
    res_url = dict['Url']
    res_id = dict['ID']
    endpoint = 'https://api.yelp.com/v3/businesses/{}/reviews'.format(res_id)
    response = requests.get(url= endpoint, headers= headers)
    business_reviews = response.json() #https://www.yelp-support.com/article/How-is-the-order-of-reviews-determined?}
    
    for review in business_reviews['reviews']:
            review_text = review['text']
            review_url = review['url']
            parse_res_url = urlparse(res_url)
            parse_rev_url = urlparse(review_url)
    
            #Compare URL from restaurants and reviews to pair them up
            if parse_res_url[2] == parse_rev_url[2]:
                dict['Review'].append(review_text)


In [14]:
#Check if the reviews were added correctly
complete_business_data

[{'ID': '9nltvO9__dkyRIT17D5TqQ',
  'Name': 'Palm House',
  'Image': 'https://s3-media3.fl.yelpcdn.com/bphoto/WEhiaYvvbCOpoGPH6rybtQ/o.jpg',
  'Url': 'https://www.yelp.com/biz/palm-house-san-francisco?adjust_creative=Sb39Q_neG8ncCfDxiJ2TWw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=Sb39Q_neG8ncCfDxiJ2TWw',
  'Review count': 1127,
  'Category': 'Caribbean',
  'Rating': 4.0,
  'Price': '$$',
  'Latitude': 37.797772,
  'Longitude': -122.432887,
  'Address': '2032 Union St',
  'City': 'San Francisco',
  'State': 'CA',
  'Country': 'US',
  'Phone': '(415) 749-9959',
  'Review': ['One of my favorite places in the city. During the day, the place is a nice restaurant. Their food portion is big. I love the Cubano and spiced fries. Their...',
   'We came here for a birthday party and rented the sun room at the entrance of the building. The minimum was $550 to book the place. We ordered several apps...',
   'I went to Palm House today and sat at the bar. My friend and I

In [9]:
#Store the data in a dataframe
restaurants_df = pd.DataFrame(complete_business_data)
restaurants_df.head(2)

Unnamed: 0,ID,Name,Image,Url,Review count,Category,Rating,Price,Latitude,Longitude,Address,City,State,Country,Phone,Review
0,9nltvO9__dkyRIT17D5TqQ,Palm House,https://s3-media3.fl.yelpcdn.com/bphoto/WEhiaY...,https://www.yelp.com/biz/palm-house-san-franci...,1127,Caribbean,4.0,$$,37.797772,-122.432887,2032 Union St,San Francisco,CA,US,(415) 749-9959,[One of my favorite places in the city. During...
1,5iZp3XBPOUTIqQY7vGAfVA,Hotline,https://s3-media3.fl.yelpcdn.com/bphoto/VO8rMt...,https://www.yelp.com/biz/hotline-san-francisco...,29,Korean,4.0,$$,37.74203,-122.50446,3560 Taraval St,San Francisco,CA,US,(415) 702-6301,[So I thought this was going to be a fast casu...


In [10]:
#Store the information in a CSV filke
restaurants_df.to_csv('Restaurants_reviews.csv',index=False,header=True)