In [3]:
import json
import sys
import pandas as pd
from matplotlib import pyplot as plt
import requests
from keys import client_id, api_key

In [4]:
## style for notebook & plots - in matplotlib bookmark ##
style = 'dark_background'
plt.style.use(style)

In [5]:
def yelp_call(url, url_params, api_key):
#THIS FUNCTION IS CALLED TO MAKE API CALL TO YELP AND RETURN THE RESPONSE

    headers = {'Authorization': 'Bearer {}'.format(api_key),}

    response = requests.get(url, headers = headers, params = url_params)
#    response.status_code
#    response.text

    return response

In [6]:
def parse_data(list_of_data):
#THIS FUNCTION CREATES A TUPLE OF BUSINESS INFORMATION FOR ALL BUSINESSES CAPTURED VIA YELP
# INCLUDES CONVERSION FOR CHANGING "$$" TO NUM FOR PRICE REPRESENTATION
    businesses=[]
    
    for business in list_of_data:
        get_categories = parse_categories(business['categories'])    
        num_categories = len(get_categories)
        if 'price' in business.keys():
            price_val = len(business['price'])
        else:
            price_val = 0
        biz_tuple = (business['id'], business['name'], get_categories, num_categories, price_val, business['review_count'], business['rating'], business['location']['address1'], 
                     business['location']['city'], business['location']['state'], business['location']['zip_code'])
        businesses.append(biz_tuple)

    return businesses

In [7]:
def parse_results(results):
# THIS FUNCTION TAKES RESULTS RETURNED FROM YELP API, PARSES THE DATA, AND STORES AS A DATAFRAME

    #EITHER ONE WORKS
    #data = json.loads(response.text)
    data = results.json()
    num = data['total']
    
    parsed_results = parse_data(data['businesses'])
    df = pd.DataFrame(parsed_results, columns=['id', 'name', 'categories', 'num_categories', 'price', 'review_count', 'rating', 'street_address', 'city', 'state', 'zip_code'])
    return df, num

In [8]:
def parse_categories(categories_list):
# THIS FUNCTION TAKES THE LIST OF CATEGORIES FROM A BUSINESS AND CREATES A LIST OF CATEGORIES OF THE BUSINESS

    list_of_categories = []
    x_len = len(categories_list)
    for x in range(0,x_len):
        list_of_categories.append(categories_list[x]['alias'])
    return list_of_categories

In [9]:
def df_save(csv_filepath, parsed_results):
# THIS FUNCTION SAVES A DATAFRAME AS A CSV FILE
    
    f = open(csv_filepath, 'a', encoding="utf-8")
    parsed_results.to_csv(f, header=False)
    f.close()

In [18]:
def generate_all_yelp_data():
# THIS FUNCTION IS THE MAIN DRIVER - SETS SEARCH PARAMETERS, CALLS YELP API, PARSES RETURNED DATA,
# AND STORED IN CSV/DATAFRAME

    term = 'Indian'
    location = 'Washington, D.C.'
    categories = "restaurants"
    radius = 1000
    url =  "https://api.yelp.com/v3/businesses/search"
    #   price = 4
    
    headers = {'Authorization': 'Bearer {}'.format(api_key),
              }

    url_params = {
                "term": term.replace(' ', '+'),
                "location": location.replace(' ', '+'),
                "categories" : categories,
                "limit": 50,
#               "price": price,
                }

    all_results = pd.DataFrame()
    num = 1
    cur = 0
    
    while (cur < num and cur < 1000):

        url_params['offset'] = cur
        results = yelp_call(url, url_params, api_key)
        parsed_results, num = parse_results(results)
        df_save('indian_dc.csv', parsed_results)
        all_results = all_results.append(parsed_results)
        cur += 50

    return all_results

In [19]:
all_results = generate_all_yelp_data()

In [20]:
all_results

Unnamed: 0,id,name,categories,num_categories,price,review_count,rating,street_address,city,state,zip_code
0,n-6O6I7pmmpwkW2pCO-zDw,Indigo,[indpak],1,2,878,4.0,243 K St NE,"Washington, DC",DC,20002
1,CwdlygqT4cWwOtQGsYdoBw,Rasika,[indpak],1,3,3127,4.5,633 D St NW,"Washington, DC",DC,20004
2,jxTABk0jP7Y_kN3DCNiWbQ,Bombay Street Food,[indpak],1,2,389,4.0,1413 Park Rd NW,"Washington, DC",DC,20010
3,jj1vSlzf-G1_vTJJ6ftsAA,Pappe,[indpak],1,2,305,4.0,1317 14th St NW,"Washington, DC",DC,20005
4,0CfNGIodfKdoItAmSWgDDg,RASA,[indpak],1,2,412,4.5,1247 First St SE,"Washington, DC",DC,20003
...,...,...,...,...,...,...,...,...,...,...,...
25,0m9bQ19dZaiN25aHi9aCQw,Mirch Masala,[indpak],1,1,69,3.5,7101 Democracy Blvd,Bethesda,MD,20817
26,FsCp52joCds65P7E-zDXgA,sweetgreen,"[salad, vegetarian]",2,2,175,3.5,2905 District Ave,Fairfax,VA,22031
27,gpED4iMJzTpTo4pae_585A,Grill Kabob,"[mideastern, halal, afghani]",3,2,58,3.5,6500 Springfield Mall,Springfield,VA,22150
28,QBgGlRhti1ALQ8NlJfKvAg,Sweet Sosumba Jamaican Vegan Cafe,"[vegan, caribbean]",2,0,87,4.5,3501 Georgia Ave NW,"Washington, DC",DC,20010


In [21]:
all_results.shape

(230, 11)