In [61]:
import json
import sys
import pandas as pd
from matplotlib import pyplot as plt
import requests
from keys import client_id, api_key

In [62]:
## style for notebook & plots - in matplotlib bookmark ##
style = 'dark_background'
plt.style.use(style)

In [63]:
# SEARCH PARAMETERS
url =  "https://api.yelp.com/v3/businesses/search"
term = 'Indian'
location = 'Washington, D.C.'
categories = "restaurants"
radius = 100

# HEADERS
headers = {'Authorization': 'Bearer {}'.format(api_key),
    }

# URL PARAMETERS
url_params = {"term": term.replace(' ', '+'),
                "location": location.replace(' ', '+'),
                "categories" : categories,
                "limit": 50,
                }

# RESPONSE FROM API SERVER
response = requests.get(url, headers = headers, params = url_params)

In [64]:
def yelp_call(url, url_params, api_key):
#THIS FUNCTION IS CALLED TO MAKE API CALL TO YELP AND RETURN THE RESPONSE

    headers = {'Authorization': 'Bearer {}'.format(api_key),}

    response = requests.get(url, headers = headers, params = url_params)
#    response.status_code
#    response.text

    return response

In [65]:
def parse_data(list_of_data):
#THIS FUNCTION CREATES A TUPLE OF BUSINESS INFORMATION FOR ALL BUSINESSES CAPTURED VIA YELP
# INCLUDES CONVERSION FOR CHANGING "$$" TO NUM FOR PRICE REPRESENTATION
    businesses=[]
    
    for business in list_of_data:
        get_categories = parse_categories(business['categories'])    
        num_categories = len(get_categories)
        if 'price' in business.keys():
            price_val = len(business['price'])
        else:
            price_val = 0
        biz_tuple = (business['business_id'], business['name'], get_categories, num_categories, price_val, business['review_count'], business['rating'], business['location']['address1'], 
                     business['location']['city'], business['location']['state'], business['location']['zip_code'])
        businesses.append(biz_tuple)

    return businesses

In [66]:
def parse_results(results):
# THIS FUNCTION TAKES RESULTS RETURNED FROM YELP API, PARSES THE DATA, AND STORES AS A DATAFRAME

    #EITHER ONE WORKS
    #data = json.loads(response.text)
    data = results.json()
    num = data['total']
    
    parsed_results = parse_data(data['businesses'])
    df = pd.DataFrame(parsed_results, columns=['business_id', 'name', 'categories', 'num_categories', 'price', 'review_count', 'rating', 'street_address', 'city', 'state', 'zip_code'])
    return df, num

In [67]:
def parse_categories(categories_list):
# THIS FUNCTION TAKES THE LIST OF CATEGORIES FROM A BUSINESS AND CREATES A LIST OF CATEGORIES OF THE BUSINESS

    list_of_categories = []
    x_len = len(categories_list)
    for x in range(0,x_len):
        list_of_categories.append(categories_list[x]['alias'])
    return list_of_categories

In [68]:
def df_save(csv_filepath, parsed_results):
# THIS FUNCTION SAVES A DATAFRAME AS A CSV FILE
    
    f = open(csv_filepath, 'a', encoding="utf-8")
    parsed_results.to_csv(f, header=False)
    f.close()

In [69]:
def get_yelp_reviews(business_id):
# THIS FUNCTION CALLS YELP API TO RETRIEVE ALL REVIEWS FOR A GIVEN BUSINESS ID

    business_reviews = []
    headers = {'Authorization': 'Bearer {}'.format(api_key),}
    url =  "https://api.yelp.com/v3/businesses/{}/reviews".format(business_id)

    review_response = requests.get(url, headers = headers)
    review_data = json.loads(review_response.text)
        
    for review in review_data['reviews']:
        review_tuple = (business_id, review['business_id'], review['text'], review['rating'], review['time_created'])
        business_reviews.append(review_tuple)

    return business_reviews

In [70]:
def generate_all_reviews(businesses_list):
# THIS FUNCTION IS CALLED TO CREATE A LIST OF REVIEWS FOR ALL BUSINESSES WITHIN ITS PASSED DATA SET

    all_business_reviews = []

    for biz in businesses_list:
        biz_reviews = get_yelp_reviews(biz)
        for each_business in biz_reviews:
            all_business_reviews.append(each_business)

    reviews_results = pd.DataFrame(all_business_reviews, columns = 
                                   ['business_id', 'review_id', 'text', 'rating', 'time_created'])
     
    return reviews_results

In [86]:
def generate_all_yelp_data_la():
# THIS FUNCTION IS THE MAIN DRIVER - SETS SEARCH PARAMETERS, CALLS YELP API, 
# PARSES RETURNED DATA AND STORED IN CSV/DATAFRAME

    term = 'Indian'
    location = 'Los Angeles'
    categories = "restaurants"
    radius = 40000
#   price = 4
    
    url =  "https://api.yelp.com/v3/businesses/search"
    url_params = {
                "term": term.replace(' ', '+'),
                "location": location.replace(' ', '+'),
                "categories" : categories,
                "radius" : radius,
                "limit": 50,
#                "price": price,
                }

    all_results_la = pd.DataFrame()
    num = 1
    cur = 0
    
    while (cur < num and cur < 1000):

        url_params['offset'] = cur
        results = yelp_call(url, url_params, api_key)
        parsed_results, num = parse_results(results)
        parsed_reviews = generate_all_reviews(parsed_results.business_id)
        df_save('test_data_la.csv', parsed_results)
        df_save('test_reviews_la.csv', parsed_reviews)
        all_results_la = all_results_la.append(parsed_results)
        cur += 50

    return all_results_la

In [87]:
def generate_all_yelp_data_dc():
# THIS FUNCTION IS THE MAIN DRIVER - SETS SEARCH PARAMETERS, CALLS YELP API, 
# PARSES RETURNED DATA AND STORED IN CSV/DATAFRAME

    term = 'Indian'
    location = 'Washington, D.C.'
    categories = "restaurants"
    radius = 40000
#   price = 4
    
    url =  "https://api.yelp.com/v3/businesses/search"
    url_params = {
                "term": term.replace(' ', '+'),
                "location": location.replace(' ', '+'),
                "categories" : categories,
                "radius" : radius,
                "limit": 50,
#                "price": price,
                }

    all_results_dc = pd.DataFrame()
    num = 1
    cur = 0
    
    while (cur < num and cur < 1000):

        url_params['offset'] = cur
        results = yelp_call(url, url_params, api_key)
        parsed_results, num = parse_results(results)
        parsed_reviews = generate_all_reviews(parsed_results.business_id)
        df_save('test_data_dc.csv', parsed_results)
        df_save('test_reviews_dc.csv', parsed_reviews)
        all_results_dc = all_results_dc.append(parsed_results)
        cur += 50

    return all_results_dc

In [88]:
all_results_la = generate_all_yelp_data_la()

In [89]:
all_results_dc = generate_all_yelp_data_dc()

In [91]:
all_results_la

Unnamed: 0,id,name,categories,num_categories,price,review_count,rating,street_address,city,state,zip_code
0,3XTjerBg_PywBN81Ts45Bg,India's Restaurant,"[indpak, halal, seafood]",3,2,2951,4.5,4366 Fountain Ave,Los Angeles,CA,90029
1,uteG6HIb4an-y5fFCXWo7w,Anarkali Indian Restaurant,[indpak],1,2,2974,4.5,7013 Melrose Ave,Los Angeles,CA,90038
2,52QPoJFEObNp6f6rv8XthQ,India's Tandoori,"[indpak, gluten_free, vegan]",3,2,1192,4.0,11819 Wilshire Blvd,Los Angeles,CA,90025
3,B6fT0KdNWPKH9Yjaz06NiA,Biriyani Kabob House,"[pakistani, halal, indpak]",3,2,908,4.5,3525 W 3rd St,Los Angeles,CA,90020
4,EB-RtApkwptE3wRCQO8bCw,Badmaash - Downtown LA,"[indpak, newamerican]",2,2,1841,4.0,108 W 2nd St,Los Angeles,CA,90012
...,...,...,...,...,...,...,...,...,...,...,...
32,UdWQu4w9l3Y9aKkioWUryw,Todo Verde,"[vegan, catering, latin]",3,2,56,5.0,,Los Angeles,CA,90021
33,jIu7TUg3cDAmK07KbwAOKw,Cafe Sierra,"[newamerican, buffets, seafood]",3,3,968,3.5,555 Universal Hollywood Dr,Universal City,CA,91608
34,o5RJUFQJgVhaFRtZpzCfxA,P.F. Chang's,"[chinese, asianfusion, gluten_free]",3,2,716,3.0,21821 Oxnard St,Woodland Hills,CA,91367
35,7tKP4mI07i6A31QVBCr7Jg,California Chicken Cafe,"[tradamerican, salad, wraps]",3,2,454,3.5,9045 Topanga Canyon Blvd,Chatsworth,CA,91304


In [82]:
all_results_la.shape

(0, 11)

In [77]:
all_results_dc.shape

(9, 11)

In [60]:
response.status_code

200