# Yelp API

In [2]:
import psycopg2
import yaml
import requests
from pprint import pprint

## Connect to PostgreSQL database

In [3]:
# connect to the databse
conn = psycopg2.connect(database="postgres",
                        user="postgres",
                        password="apassword",
                        host="192.168.0.104",
                        port="5432")

# enable autocommit
conn.autocommit = True

# define cursor
cur = conn.cursor()

# create a table
cur.execute("""CREATE TABLE IF NOT EXISTS yelp_business_search
               (id varchar PRIMARY KEY NOT NULL,
                business jsonb NOT NULL)""")

## Collect data from Yelp API

In [4]:
# read in config file
with open('/home/curtis/etc/yelp.yaml') as f:
    config = yaml.load(f)
    
# get the API key
api_key = config['yelp.com'][0]['key']

In [5]:
def query_api(limit=50, offset=0):
    
    # define the base URL for the request
    base_url = 'https://api.yelp.com/v3/businesses/search'

    # define the header for the request
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }

    # define the parameters for the request
    params = {
        'location': 'boston',
        'categories': 'coffee',
        'limit': limit,
        'offset': offset,
    }

    # submit the request to the API
    response = requests.get(base_url, headers=headers, params=params)

    # checek the status of the response
    response.status_code

    # save response data
    data = response.json()
    
    return data

In [6]:
# iterate over range and submit queries
responses = []
for i, n in enumerate(list(range(0,1000, 50))):
    r = query_api(limit=50, offset=n)
    print("query #{} completed".format(i))
    responses.append(r)

query #0 completed
query #1 completed
query #2 completed
query #3 completed
query #4 completed
query #5 completed
query #6 completed
query #7 completed
query #8 completed
query #9 completed
query #10 completed
query #11 completed
query #12 completed
query #13 completed
query #14 completed
query #15 completed
query #16 completed
query #17 completed
query #18 completed
query #19 completed


In [7]:
# pull data frmo individual responses and combine
data = []
for line in responses:
    if 'businesses' in line.keys():
        data.extend(line['businesses'])
    
print(len(data))

1000


## Investigate structure of data returned from API

In [8]:
# inspect repsonse structure
data[0].keys()

dict_keys(['name', 'url', 'transactions', 'review_count', 'phone', 'categories', 'rating', 'display_phone', 'coordinates', 'price', 'id', 'image_url', 'distance', 'location', 'is_closed', 'alias'])

In [9]:
# what is the first business look like?
pprint(data[0])

{'alias': 'modern-pastry-shop-boston',
 'categories': [{'alias': 'bakeries', 'title': 'Bakeries'},
                {'alias': 'coffee', 'title': 'Coffee & Tea'},
                {'alias': 'desserts', 'title': 'Desserts'}],
 'coordinates': {'latitude': 42.36324, 'longitude': -71.05474},
 'display_phone': '(617) 523-3783',
 'distance': 2185.181158712615,
 'id': '54ElwAyN-o8e4uvOkC85hw',
 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/BBuJF89-g0zFa1HcCHmF0w/o.jpg',
 'is_closed': False,
 'location': {'address1': '257 Hanover St',
              'address2': '',
              'address3': '',
              'city': 'Boston',
              'country': 'US',
              'display_address': ['257 Hanover St', 'Boston, MA 02113'],
              'state': 'MA',
              'zip_code': '02113'},
 'name': 'Modern Pastry Shop',
 'phone': '+16175233783',
 'price': '$',
 'rating': 4.0,
 'review_count': 1596,
 'transactions': ['delivery'],
 'url': 'https://www.yelp.com/biz/modern-pastry-shop-boston

## Load raw source data into PostgreSQL database

In [10]:
# iterate over response data and insert into a postgreSQL
for n, i in enumerate(data):
    
    try:

        # put data into databse
        cur.execute("""INSERT INTO yelp_business_search
                       (id, business) 
                       VALUES (%s, %s)""", [i['id'], json.dumps(i)])

    except:
        
        # print warning
        #print(n)
        pass