In [1]:
import json
import requests
import configparser
import os

import pandas as pd
import numpy as np
import datetime, time
from sklearn import preprocessing
import logging


In [2]:
config = configparser.ConfigParser()
config.read('./credentials/dwh.cfg')

['./credentials/dwh.cfg']

In [3]:
os.environ['YELP_CLIENT_ID'] = config.get('YELP', 'YELP_CLIENT_ID')
os.environ['YELP_API_KEY'] = config.get('YELP', 'YELP_API_KEY')

In [4]:
yelp_url = 'https://api.yelp.com/v3/businesses/search'
headers = {'Authorization': 'Bearer {}'.format(os.environ['YELP_API_KEY'])}

In [5]:
 category = 'Restaurants'
 districts  = [ 'Midtown West', 
                'Greenwich Village',
                'East Harlem',
                'Upper East Side',
                'Midtown East',
                'Gramercy',
                'Little Italy',
                'Chinatown',
                'SoHo',
                'Harlem',
                'Upper West Side',
                'Tribeca',
                'Garment District',
                'Stuyvesant Town',
                'Financial District',
                'Chelsea',
                'Morningside Heights',
                'Times Square',
                'Murray Hill',
                'East Village',
                'Lower East Side',
                'Hells Kitchen',
                'Central Park'
                ]

In [27]:
run_time = int(time.time())
for district in range(len(districts)):
    logging.info('-------------------\n Gathering data for {} \n ---------------------\n'.format(districts[district]))

    for step in range(50):
        location = districts[district] + ', Manhattan, NY'
        term = 'Restaurants'
        search_limit = 20
        offset = step*search_limit
        categories = '(restaurants, ALL)'
        sort_by = 'best_match'

        url_params = {
                         'location': location.replace(' ', '+'),
                         'term' : term,
                         'limit': search_limit,
                         'offset': offset,
                         'categories': categories,
                         'sorty_by': sort_by
                     }
       
        response = requests.get(yelp_url, headers=headers, params=url_params)
        
        try:
            response.json()["businesses"]
        except KeyError:
            break # The API is limiting the amount of results returning a json without the key: businesses

        logging.info(('***** {} Restaurants #{} - #{} ....{}'.format(districts[district], 
                                                              offset+1, offset+search_limit,
                                                              response)))
        
        filename = 'restaurants-{}-{}.json'.format(districts[district].replace(" ", "-"), step)
        directory = './data/{}/{}/'.format(str(run_time),districts[district].replace(" ", "-"))

        os.makedirs(directory, exist_ok=True)

        try:
            data=response.json()
        except KeyError:
            continue

        with open(directory+filename, 'w') as data_object:
            json.dump(data, data_object)

        
logging.info(response)
logging.info(type(response.text))
logging.info(response.json().keys())
logging.info(response.text[:1000])



In [29]:
data

{'businesses': [{'id': 'sfsjk1h50ooC2ioY7m48gw',
   'alias': 'pig-heaven-new-york-3',
   'name': 'Pig Heaven',
   'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/YzHJPPxM5m6qp_vwfrC6SA/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/pig-heaven-new-york-3?adjust_creative=57uLU1O-AjUbFSI7t6x7lw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=57uLU1O-AjUbFSI7t6x7lw',
   'review_count': 429,
   'categories': [{'alias': 'bbq', 'title': 'Barbeque'},
    {'alias': 'szechuan', 'title': 'Szechuan'},
    {'alias': 'dimsum', 'title': 'Dim Sum'}],
   'rating': 4.0,
   'coordinates': {'latitude': 40.77544, 'longitude': -73.95686},
   'transactions': ['delivery', 'pickup'],
   'price': '$$',
   'location': {'address1': '1420 3rd Ave',
    'address2': '',
    'address3': '',
    'city': 'New York',
    'zip_code': '10028',
    'country': 'US',
    'state': 'NY',
    'display_address': ['1420 3rd Ave', 'New York, NY 10028']},
   'phone': '+12127444333',
 