In [16]:
import requests
import json
import sys
import pandas as pd
import csv
from  keys  import  client_id, api_key

In [2]:
"""
Running this allows the user to input their wanted search results, and it will save them to variables.
Then, it will save these variables into the url_params variable which is used for searching the Yelp API
"""

url =  'https://api.yelp.com/v3/businesses/search' #points to url of user's yelp developer page

headers = {
        'Authorization': 'Bearer {}'.format(api_key),
    }

#Asks the user to input different key-words to define their yelp search
term = input('What sort of business do you think you can do? (plumbers, takout, etc.): ')
location = input('Near where?: ')
categories = input('What sort of industry would you like to disrupt?: ')

spec = f'{term}_{location}_{categories}_data'
#Note on categories, they appear to be optional. You may search 'All' for all, or possibly leave the field blank.

url_params = {
                "term": term.replace(' ', '+'),
                "location": location.replace(' ', '+'),
                "categories" : categories,
                "limit": 50,
                "offset": 0
            }

What sort of business do you think you can do? (plumbers, takout, etc.): Indian
That's what the last guy said, too.
Near where? (please say Monaco): NYC
dang I really wanted to work there..
DC? Suit yourself...
What sort of industry would you like to disrupt?: Food


In [3]:
def yelp_call(headers, url_params):
    """
    This function will use the url_params variable and the headers variable to call the Yelp API,
    and return the data as a JSON 
    This will use the requests module to get from Yelp. 
    What is returned will be modified by our URL parameters.
    This must be called fresh with updated url_params for each call if we want to return more results.
    
    2/14 — defined data variable in this loop.
    """
    response = requests.get(url, headers=headers, params=url_params) # our url, header and params should be consistent, atleast with our Yelp data
    data = response.json()
    return data

In [4]:
def parse_data(list_of_data):
    """
    Input data['businesses'] to return a list of tuples,
    with each tuple containing individual business name, address, rating, review count,
    Categories, and business ID
    """
    businesses = []
    for business in list_of_data:
        biz_price = None
        if 'price' not in business.keys():
            biz_price = None
        else:
            biz_price = len(business['price'])
        biz_tuple = (business['name'],
                     business['location']['display_address'],
                     business['location']['city'],
                     business['rating'],
                     business['review_count'],
                     business['coordinates'],
                     biz_price,
                     business['id'],
                     business['categories'])
        businesses.append(biz_tuple)
    return businesses

In [5]:
csv_filepath = f'database/{term}_{location}_database.csv'

def call_1000(csv_filepath):
    """
    This function will use the information gathered above to call the Yelp API and construct a data frame
    """
    url_params['offset'] = 0
    results = yelp_call(headers, url_params)
    parsed = parse_data(results['businesses']) # list of businesses in tuples
    num = results['total']
    biz_list = []
    #Loop through the API to reach all of the businesses in the call
    while url_params['offset'] < 1000 and len(biz_list) < num:
        for biz in parsed:
            biz_list.append(biz)
        url_params['offset'] += 50
        results = yelp_call(headers, url_params)
        if num >= len(biz_list):
            if 'businesses' not in results:
                break
            else:
                parsed = parse_data(results['businesses']) # list of businesses in tuples
        elif len(biz_list) <= 950:
            continue
        else:
            break
    
    # Create the data frame from the gathered information
    df = pd.DataFrame(biz_list, columns=['Name', 'Address','City', 'Rating','Review Count','Coordinates','Price','Id','Categories'])
    
    #Save the data frame as a CSV file
    with open(csv_filepath, "a") as f: 
        read_file = csv.writer(f)
        df.to_csv(csv_filepath, mode = "a", index = False)
    print('CSV file written to {csv_filepath}.')
    return df

In [6]:
#This will call the function above to create the new CSV file, and then display the first 5 results
biz_data = call_1000(csv_filepath)
biz_data.head()

CSV file written to {csv_filepath}.


Unnamed: 0,Name,Address,City,Rating,Review Count,Coordinates,Price,Id,Categories
0,The MasalaWala,"[179 Essex St, New York, NY 10002]",New York,4.5,1501,"{'latitude': 40.72191, 'longitude': -73.98666}",2.0,sCC7-hSdCkNPExejZT9BAQ,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali..."
1,Bengal Tiger Indian Food,"[58 W 56th St, Fl 2, New York, NY 10019]",New York,4.5,1938,"{'latitude': 40.763167, 'longitude': -73.977131}",2.0,x0k5kFArHLijdY8-NEQ7Xg,"[{'alias': 'indpak', 'title': 'Indian'}]"
2,Indian Table,"[234 Court St, Brooklyn, NY 11201]",Brooklyn,4.5,118,"{'latitude': 40.6859591, 'longitude': -73.9941...",2.0,BXoW0AUDE7WZJcY-Vhqlbg,"[{'alias': 'indpak', 'title': 'Indian'}]"
3,Indika House,"[943 Broadway, Brooklyn, NY 11206]",Brooklyn,5.0,74,"{'latitude': 40.69716, 'longitude': -73.9353}",2.0,lnI9toC-uAhBIbtHDMxvcA,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali..."
4,Tikka Indian Grill,"[185 Grand St, Brooklyn, NY 11211]",Brooklyn,4.0,335,"{'latitude': 40.71441, 'longitude': -73.96078}",2.0,WWpOWqOgS9ClYvxYVzR0lg,"[{'alias': 'indpak', 'title': 'Indian'}, {'ali..."


In [7]:
def call_reviews(biz_id): 
    """
    This function loops through the list of business ID's, and call the API on each one.
    Then, it will save this data to a list, and return the list
    """
    list_of_reviews = []
    for biz in biz_id:
        response = requests.get(f'https://api.yelp.com/v3/businesses/{biz}/reviews',headers = headers)
        review_data = response.json()
        list_of_reviews.append(review_data)
    return list_of_reviews

In [8]:
def call_all_reviews(b_data): 
    """
    This function takes in the data frame, and create a list of the business Id's from it.
    This will then return that list
    """
    biz_id = []
    for j in b_data['Id']: #ID column of business data dataframe
        biz_id.append(j)
    list_of_reviews = call_reviews(biz_id)
    return list_of_reviews


In [9]:
def format_reviews(b_data):
    """
    This function takes in the business data frame, and calls the function 'call_all_reviews' 
    to get the list of business Ids.  It will then loop through and create a new list of dictionaries
    with all of the reviews for that company, and that companies Business Id.
    """
    eg = call_all_reviews(b_data)
    list_of_reviews = []
    x = 0
    for i in eg:
        reviews = {}
        for count in list(range(0, (len(i['reviews'])))):
            reviews[f'Review_{count}'] = i['reviews'][count]['text'] 
        reviews['Id'] = b_data["Id"][x]
        list_of_reviews.append(reviews)
        x+=1
    return list_of_reviews

# format_reviews(biz_data[:20])

In [10]:
def reviews_to_csv(b_data): # If you run for all, this will output IndexError: list index out of range
    """
    This function takes in the business data frame, and runs the 'format_reviews' function.
    It then converts the list of dictionaries into the reviews CSV file
    """
    csv_filepath = f'database/{term}_{location}_reviews.csv'
    formatted_reviews = format_reviews(b_data)
    
    df = pd.DataFrame(formatted_reviews)
    with open(csv_filepath, "a") as f:
        read_file = csv.writer(f)
        df.to_csv(csv_filepath, mode = "a", index = False)
    return df

In [12]:
biz_review = reviews_to_csv(biz_data)

In [15]:
biz_review

Unnamed: 0,Review_0,Review_1,Review_2,Id
0,MasalaWala is my go to for Indian in the city....,My favorite Indian restaurant in the city. The...,I'll start off saying I ordered delivery and i...,sCC7-hSdCkNPExejZT9BAQ
1,I was really in the mood for Indian so found t...,First impressions: LOVE the decor and hole in ...,4.5/5 Dinner pre-covid and delivery during cov...,x0k5kFArHLijdY8-NEQ7Xg
2,Go to pickup spot for really excellent Indian ...,Kicking myself for never taking a photo whenev...,I had a great time eating here. The food was s...,BXoW0AUDE7WZJcY-Vhqlbg
3,"Few reasons for 5 stars, but mainly it's the d...",I usually never write reviews for delivery foo...,What a delicious way to end a long weekend. I ...,lnI9toC-uAhBIbtHDMxvcA
4,Tikka is one of my favorite restaurants in the...,Tikka is a great option for delicious Indian f...,Veggie korma and paneer tikka are definitely t...,WWpOWqOgS9ClYvxYVzR0lg
