<a href='https://ai.meng.duke.edu'> = <img align="left" style="padding-top:10px;" src=https://storage.googleapis.com/aipi_datasets/Duke-AIPI-Logo.png>

# Working with APIs: Yelp
In this example we will demonstrate how to work with APIs using the Yelp Fusion API as an example.  The documentation for the Yelp Fusion API can be accessed [here](https://www.yelp.com/developers/documentation/v3/business_search).  To run this code you will need to register to get an API key for the Fusion API.

In [18]:
import requests
import json
import os

import pandas as pd
import numpy as np

### Get search results from API
Let's start by using the API to get Yelp search results for a given query.  For this example, we will try to get information from Yelp for all the Indian restaurants in Morrisville, NC.

In [19]:
def fetchYelpData(term,location,limit):
    '''
    Gets data from Yelp for businesses that match a given search term and location

    Inputs:
        term(str): search term
        location(str): city
        limit(int): maximum number of businesses to return

    Returns:
        df(DataFrame): a dataframe containing information on businesses that match the search query
    '''
    
    # Load API key (read this in from a config.py file or type it in)
    if os.path.exists('config.py'):
        import config
        key = config.api_key
    else:
        key = input('Please enter your API key:')

    # Specify headers, url and params
    headers = {'Authorization': f'Bearer {key}'}
    url = "https://api.yelp.com/v3/businesses/search"
    payload = {
        'term': term.replace(' ', '+'),
        'location': location.replace(' ', '+'),
        'limit': limit
    }
    
    rows = [] # Hold data for each business
    ids = [] # Hold ids of businesses already added
    
    try:
        # Get response to search
        response = requests.get(url, headers=headers, params=payload)
        # Decode
        response = response.json()
        # Add results to lists
        if response['businesses']:
            data = response['businesses']
            for d in data:
                if d['id'] not in ids:
                    rows.append(d)
                    ids.append(d['id'])
    except Exception as e:
        print('Error occurred')
        print(e)

    df = pd.DataFrame(rows)
    return df

In [20]:
def processYelpData(df):
    '''
    Cleans up the data extracted from Yelp

    Inputs:
        df(DataFrame): dataframe of results from Yelp

    Returns:
        df_clean(DataFrame): processed dataframe containing cleaned results
    '''
    # Clean up columns
    df['category'] = df['categories'].apply(lambda x: x[0]['title'])
    df['address'] = df['location'].apply(lambda x: x['address1'])
    df['distance'] = df['distance']/1609
    
    # Filter to only needed columns
    df_clean = df.loc[:,['id','name','review_count','category','rating','address','display_phone','distance','price']]
    
    return df_clean

In [21]:
term = 'indian food'
location = 'Morrisville NC'
limit = 10
yelpdata = fetchYelpData(term,location,limit)
yelpdata = processYelpData(yelpdata)
yelpdata

Unnamed: 0,id,name,review_count,category,rating,address,display_phone,distance,price
0,q3AtTlcutvN32hRqicD0Wg,Naga's South Indian Cuisine,8,Indian,4.0,1000 Lower Shiloh Way,(919) 748-3152,1.986479,
1,yrfk9eKjtvlkzKyO1HPtCQ,Chutneys,41,Indian,4.0,4121 Davis Dr,(919) 377-1029,1.616669,
2,134gr4cxHe2gPZX1nBkoNw,Swagat Indian Cuisine,130,Indian,4.0,9549 Chapel Hill Rd,(919) 378-9090,2.724975,$$
3,m70fwMJ-mV3Tr6sjxitcwg,Adda,13,Indian,4.5,6105 Grace Park Dr,(919) 234-6281,1.575716,
4,xvY9cWDNb3JFk0CpfIiGTA,Tower Indian Restaurant,240,Indian,4.0,144 Morrisville Sq Way,(919) 465-2326,1.460005,$$
5,bZI5-PEVXiiQW1Og0uhdNA,Thali Indian Restaurant,94,Indian,4.0,5311 S Miami Blvd,(919) 908-0034,2.987895,$$
6,-fW-zdGWH4UbJsbs8LP7Hw,Mirchi’s Indian Kitchen & Bar,52,Indian,4.5,328 Sembler Ln,(919) 650-3007,4.268303,$$
7,FhtJhYO0mbaYe8UbakjtTw,Amber Flavors & Taste,23,Indian,4.5,3607 Davis Dr,(919) 377-2550,1.710733,$$
8,WTGRnqmGEiWsZ3HXckRejw,Kathmandu Kitchen,170,Indian,4.5,1275 NW Maynard Rd,(919) 535-3026,3.905887,$$
9,_s1qh7cL7EQv1lkCLinKmQ,Anjappar Authentic Indian Restaurant,116,Indian,4.0,101 Ledgestone Way,(919) 377-1473,3.470973,$$


### Get reviews from API
Another important dataset you can get from the API is the Yelp reviews.  Let's now try to get reviews for a given restaurant from our list above.

In [22]:
def fetchYelpReviews(yelp_id):
    '''
    Fetches reviews for a given business from Yelp API

    Inputs:
        yelp_id(str): yelp id of business to fetch review for

    Returns:
        df(DataFrame): dataframe containing the ratings, review and timestamps
    '''

    # Load API key (read this in from a config.py file or type it in)
    if os.path.exists('config.py'):
        import config
        key = config.api_key
    else:
        key = input('Please enter your API key:')

    # Specify headers and url
    headers = {'Authorization': f'Bearer {key}'}
    url = f"https://api.yelp.com/v3/businesses/{yelp_id}/reviews"
    
    try:
        # Get response to search
        response = requests.get(url, headers=headers)
        # Decode
        response = response.json()
        
        reviews = []
        for review in response['reviews']:
            reviews.append(review)
        
    except Exception as e:
        print('Error occurred')
        print(e)

    df = pd.DataFrame(reviews)
    df['date'] = df['time_created'].apply(lambda x: x.split(' ')[0])
    
    df = df.loc[:,['rating','text','date']]
    
    return df

In [23]:
reviews_df = fetchYelpReviews(yelp_id='yrfk9eKjtvlkzKyO1HPtCQ')
reviews_df

Unnamed: 0,rating,text,date
0,5,This place is great and a must visit for all B...,2022-08-19
1,5,This is my 1st five rating for a good reason: ...,2022-07-16
2,3,Luke warm madras coffee; 20 min to get seated;...,2022-05-08
