# Get restaurant data from Yelp API

### Step 0. Load packages

In [14]:
from __future__ import print_function
import argparse
import json
import pprint
import requests
import sys
import urllib
import pandas as pd
import numpy as np
from ast import literal_eval

import matplotlib.pyplot as plt
%matplotlib inline

import warnings 
warnings.filterwarnings('default')

try:
    # For Python 3.0 and later
    from urllib.error import HTTPError
    from urllib.parse import quote
    from urllib.parse import urlencode
except ImportError:
    # Fall back to Python 2's urllib2 and urllib
    from urllib2 import HTTPError
    from urllib import quote
    from urllib import urlencode

Step 1. Set up utility functions

In [3]:
def request(host, path, api_key, url_params=None):
    """Given your API_KEY, send a GET request to the API.
    Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        API_KEY (str): Your API Key.
        url_params (dict): An optional set of query parameters in the request.
    Returns:
        dict: The JSON response from the request.
    Raises:
        HTTPError: An error occurs from the HTTP request.
    """
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }
    print(u'Querying {0} ...'.format(url))
    response = requests.request('GET', url, headers=headers, params=url_params)
    return response.json()


def search(api_key, term, location):
    """Query the Search API by a search term and location.
    Args:
        term (str): The search term passed to the API.
        location (str): The search location passed to the API.
    Returns:
        dict: The JSON response from the request.
    """
    url_params = {
        'term': term.replace(' ', '+'),
        'location': location.replace(' ', '+'),
        'sort_by': 'distance',
        'limit': SEARCH_LIMIT
    }
    return request(API_HOST, SEARCH_PATH, api_key, url_params=url_params)

def get_reviews(business_id):
    """Query the Business API by a business ID.
    Args:
        business_id (str): The ID of the business to query.
    Returns:
        dict: The JSON response from the request.
    """
    business_path = BUSINESS_PATH + business_id
    url = API_HOST + business_path + '/reviews'
    headers = {'Authorization': f"Bearer {API_KEY}"}
    response = requests.get(url, headers=headers)
    return response.json()

### Step 2. Set up API parameters

In [4]:
# set up the api key
# API_KEY = "fzeRFbisDGyup_leFAbOn-I2ZXWzbDG89yb9Q9hBMjhmnJJTDCETcWYHN94k3GkGxWHFag41JVoWBtcBZdGLUK3ee_Ep8s_ppZNt61CoLrrpC_X4CpyqnRr7vTrnW3Yx"

API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses/'

DEFAULT_TERM = 'restaurants'
SEARCH_LIMIT = 50

### Step 3. Get restaurant data by zip code

In [None]:
zip_gentri = ["10454", "10455", "10459", "10469", "10474", "11203", "11205", "11206", "11211", "11212",
              "11213", "11215", "11216", "11217", "11218", "11220", "11221", "11222", "11225", "11226",
              "11232", "11233", "11237", "11238", "10002", "10003", "10007", "10009", "10013", "10024",
              "10025", "10026", "10027", "10029", "10030", "10031", "10032", "10033", "10034", "10035",
              "10037", "10038", "10039", "10040", "10115", "10128", "11101", "11102", "11103", "11105",
              "11106", "11370", "11377"]

zip_nongen = ["10452", "10453", "10461", "10465", "10467", "10468", "10472", "10473", "11204", "11208", 
              "11214", "11223", "11224", "11228", "11235", "11239"]

zip_overla = ["10451", "10456", "10457", "10458", "10460", "10462", "10463", "11207", "11219", "11236"]

Gentrifying areas

In [None]:
restaurants_gentri = []
for i in zip_gentri:
    busine = search(API_KEY, DEFAULT_TERM, i).get('businesses')
    restaurants_gentri.append(busine)

list_restaurants_gentri = [item for sublist in restaurants_gentri for item in sublist]

Non-gentrifying areas

In [None]:
restaurants_nongen = []
for i in zip_nongen:
    busine = search(API_KEY, DEFAULT_TERM, i).get('businesses')
    restaurants_nongen.append(busine)

list_restaurants_nongen = [item for sublist in restaurants_nongen for item in sublist]

Overlapped areas

In [None]:
restaurants_overla = []
for i in zip_overla:
    busine = search(API_KEY, DEFAULT_TERM, i).get('businesses')
    restaurants_overla.append(busine)

list_restaurants_overla = [item for sublist in restaurants_overla for item in sublist]

Data for gentrifying, non-gentrifying and overlapped areas are saved as csv files under the data folder.

In [None]:
df_gentri_raw = pd.DataFrame.from_dict(list_restaurants_gentri)
df_nongen_raw = pd.DataFrame.from_dict(list_restaurants_nongen)
df_overla_raw = pd.DataFrame.from_dict(list_restaurants_overla)