In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json

# Import API key
from config import api_key1, api_key2

# Input File (CSV) and initilize the dataframe to fill out with yelp data
input_data_file = "restaurants.csv"
restaurant_df = pd.read_csv(input_data_file)

yelp_df = pd.DataFrame({"restaurant": restaurant_df["business_name"], 
                        "address": restaurant_df["business_address"],
                        "recent_health_score": restaurant_df["recent_score"],
                        "recent_health_risk": restaurant_df["recent_risk"],
                        "average_health_score": restaurant_df["average_score"]})
yelp_df["zipcode"] = ""
yelp_df["yelp_business_id"] = ""
yelp_df["rating"] = "" 
yelp_df["price"] = ""
yelp_df["review_count"] = ""
yelp_df["category_1"] = "" 
yelp_df["category_2"] = "" 
yelp_df["category_3"] = "" 
yelp_df["distance"] = ""

number = yelp_df["restaurant"].count()
print(f"Restaurants in SF with Health Scores: {number}")
yelp_df.head()



Restaurants in SF with Health Scores: 599


Unnamed: 0,restaurant,address,recent_health_score,recent_health_risk,average_health_score,zipcode,yelp_business_id,rating,price,review_count,category_1,category_2,category_3,distance
0,One Waan Thai,2922 Diamond St,96,Moderate Risk,96.666667,,,,,,,,,
1,Project juice,506 Castro St,98,Low Risk,78.25,,,,,,,,,
2,Seismic Coffee,298 Market St,90,Moderate Risk,88.5,,,,,,,,,
3,Seismic Coffee,598 Market St,96,Low Risk,96.0,,,,,,,,,
4,Smitten Ice Cream,432 Octavia St #1a,96,Moderate Risk,98.0,,,,,,,,,


In [2]:
url = "https://api.yelp.com/v3/businesses/search?"

headers = {'Authorization': 'Bearer %s' % api_key1}
params = {"term": "restaurant",
          "location": "address",
          "categories": "restaurants",
          "radius": 25,
          "limit": 1}

print("---------------------------")
print("Beginning API calls to Yelp")
print("---------------------------")

# Loop through to get all the restaurants
# use iterrows to iterate through pandas dataframe
for index, row in yelp_df.iterrows():
    # restaurant and address
    params["term"] = row['restaurant']
    params["location"] = row['address'] + ", " + "San Francisco, CA"

    print(f"processing record {index}, {params['term']}")
    # Make the request
    business_data = requests.get(url, params=params, headers=headers).json()
    
    # Use Try/Except to fill out the data.  Not all data may be available.
    try:
        yelp_df.loc[index, 'yelp_business_id'] = business_data["businesses"][0]["id"]
          
        try:
            yelp_df.loc[index, 'rating'] = business_data["businesses"][0]["rating"]
        except(KeyError, IndexError):
            yelp_df.loc[index, 'rating'] = ""
          
        try:
            yelp_df.loc[index, 'review_count'] = business_data["businesses"][0]["review_count"]
        except(KeyError, IndexError):
            yelp_df.loc[index, 'review_count'] = ""  
          
        try:
            yelp_df.loc[index, 'distance'] = business_data["businesses"][0]["distance"]
        except(KeyError, IndexError):
            yelp_df.loc[index, 'distance'] = ""
          
        try:
            yelp_df.loc[index, 'price'] = business_data["businesses"][0]["price"]
        except(KeyError, IndexError):
            yelp_df.loc[index, 'price'] = ""
          
        try:
            yelp_df.loc[index, "zipcode"] = business_data["businesses"][0]["location"]["zip_code"]
        except(KeyError, IndexError):
            yelp_df.loc[index, "zipcode"] = ""
          
        try:
            yelp_df.loc[index, 'category_1'] = business_data["businesses"][0]["categories"][0]["title"]
            yelp_df.loc[index, 'category_2'] = business_data["businesses"][0]["categories"][1]["title"]
            yelp_df.loc[index, 'category_3'] = business_data["businesses"][0]["categories"][2]["title"]
        except (KeyError, IndexError):
            yelp_df.loc[index, 'category_3'] = ""
          
    # If there is no "yelp_business_id", there is no yelp data 
    # so we set that value to "NaN", later the row can be removed.
    except(KeyError, IndexError):
        print("Restaurant not found... skipping.")
        yelp_df.loc[index, 'yelp_business_id'] = float("nan")
          
    if index == 4000:
        print("Reached maximum calls, starting group 2:")
        # We are only allowed 5000 calls to yelp per day so we have a second key to
        # finish processes the 5000+ restaurants.
        headers = {'Authorization': 'Bearer %s' % api_key2}
          
print("---------------------------")
print("All restaurants processed!!")
print("---------------------------")
    

---------------------------
Beginning API calls to Yelp
---------------------------
processing record 0, One Waan Thai
processing record 1, Project juice
processing record 2, Seismic Coffee
processing record 3, Seismic Coffee
processing record 4, Smitten Ice Cream
processing record 5, Glena's
processing record 6, Blue Bottle Coffee
processing record 7, Korean Bobcha
processing record 8, Barry's Bootcamp
processing record 9, La Boulange de San Francisco
processing record 10, The Bindery
processing record 11, Milkbean
processing record 12, Zaoh Restaurant
processing record 13, Corks
processing record 14, Overtime
processing record 15, The Organic Coup
processing record 16, CafeLambretta
processing record 17, Big Fish Little Fish Poke
processing record 18, Mi Lindo Peru
processing record 19, Homegrown Sustainable Sandwiches
processing record 20, Thomas Edison Charter Academy
processing record 21, Manila Bowl
processing record 22, Frena
processing record 23, Hawker Fare
processing record 2

processing record 209, Nute's
processing record 210, Sightglass Coffee
processing record 211, St. Cecilia School
processing record 212, Open Kitchen
processing record 213, Enter The Cafe
processing record 214, Parties That Cook
processing record 215, Barrel Proof
processing record 216, Pho Huynh Sang
processing record 217, Parc 55 San Francisco, a Hilton Hotel
processing record 218, Troy Greek Cuisine
processing record 219, Le Cupboard
processing record 220, Roxie Food Center
processing record 221, Lemonade
processing record 222, Old Ship
processing record 223, Ritual Coffee Roasters
processing record 224, Hello Sandwich & Noodle
processing record 225, Home Skillet SF
processing record 226, Jake's Steaks
processing record 227, Alma Cocina
processing record 228, Saint Frank Coffee
processing record 229, Max's Opera Cafe
processing record 230, Food and Liquor World
processing record 231, ABC Falafel
processing record 232, Newkirk's
processing record 233, Top Round Roast Beef
processing r

processing record 424, 94939 Gilroy Garlic
processing record 425, Lush Gelato
processing record 426, Fritz Mission
processing record 427, Ace King BBQ
processing record 428, Toyama Japanese Restaurant
processing record 429, 94971 Gilroy Garlic Stand
processing record 430, 94983 Chowder House
processing record 431, 94884 Great House of Brews
processing record 432, Naya Cafe
processing record 433, Sultan's Kebab
processing record 434, CABLE CAR COFFEE
processing record 435, Wing Wings
processing record 436, Sakesan
processing record 437, Rose Kitchen
processing record 438, R Caffe
processing record 439, Pearl
processing record 440, Sidewalk Juice
processing record 441, Bay Subs & Deli
processing record 442, Mangal Mediterranean Restaurant
processing record 443, Beer Nerds
processing record 444, Hi-Way
processing record 445, Aroma Tea Shop
processing record 446, Equator Coffees
processing record 447, Greaser Coffee
processing record 448, 95202 Mission Street Nacho Cart
processing record 4

In [3]:
yelp_df.count()

restaurant              599
address                 599
recent_health_score     599
recent_health_risk      545
average_health_score    599
zipcode                 599
yelp_business_id        599
rating                  599
price                   599
review_count            599
category_1              599
category_2              599
category_3              599
distance                599
dtype: int64

In [5]:
yelp_df = yelp_df.dropna(subset=['yelp_business_id'])
yelp_df.count()

restaurant              599
address                 599
recent_health_score     599
recent_health_risk      545
average_health_score    599
zipcode                 599
yelp_business_id        599
rating                  599
price                   599
review_count            599
category_1              599
category_2              599
category_3              599
distance                599
dtype: int64

In [4]:
# Export file as a CSV, without the Pandas index, but with the header
yelp_df.to_csv("yelp_ratings.csv", index=False, header=True)