## Import packages and define functions

from Egemen's file 'generate coordinates'

In [10]:
import geopy.distance
import numpy as np
import pandas as pd
import csv

In [11]:
def gen_coordinates(topleft=(42.018708, -87.822461), botright=(41.644748, -87.524522), radius=200):
    '''
    Generates a list of coordinates corresponding to center points of non-intersecting circles.
    
    Inputs:
        topleft: tuple that contains the coordinates for the northwest corner of the rectangle.
        botright: tuple that contains the coordinates for the southeast corner of the rectangle.
        radius: interval at which the coordinates will be generated (in meters).
        
    Outputs:
        list of tuples containing coordinates partially covering the area of the rectangle. 
    '''
    
    #trying to find out what 0.000001 change in lat and lon corresponds to in meters
    
    #make an estimate by taking the average of meter change in the farthest 2 points of the rectangle
    
    compare_tl = (topleft[0] + 1/1000000, topleft[1])
    compare_br = (botright[0] + 1/1000000, botright[1])
    
    #geopy.distance.distance(c1, c2).m gives the distance between coordinates c1 and c2 in meters.
    
    lat_unit = (geopy.distance.distance(compare_tl, topleft).m + 
                geopy.distance.distance(compare_br, botright).m) / 2
    
    compare_tl = (topleft[0], topleft[1] + 1/1000000)
    compare_br = (botright[0], botright[1] + 1/1000000)
    long_unit = (geopy.distance.distance(compare_tl, topleft).m + 
                geopy.distance.distance(compare_br, botright).m) / 2
    
    v = radius / lat_unit / 1000000
    h = radius / long_unit / 1000000

    #h and v are how much we should change the lat and long values to move north/south and 
    #east/west by radius meters.
    
    lats = np.arange(min(topleft[0], botright[0]), max(topleft[0], botright[0]), v * 2)
    longs = np.arange(min(topleft[1], botright[1]), max(topleft[1], botright[1]), h * 2)
    
    coords = [(round(lat, 6), round(long, 6)) for lat in lats for long in longs]
    
    #fillers will be used to generate lats and longs that cover the areas between the circles
    
    filler_tl = (round(topleft[0] - v, 6), round(topleft[1] + h, 6))
    filler_br = (round(botright[0] + v, 6), round(botright[1] - h, 6))

    
    
    return(coords, filler_tl, filler_br)

In [12]:
def gen_coords_helper(topleft=(42.018708, -87.822461), botright=(41.644748, -87.524522), radius=200):
    '''
    Helper function that calls gen_coordinates two times and aggregates results to cover for the 
    areas between the initial set of circles.
    
    Inputs:
        topleft: tuple that contains the coordinates for the northwest corner of the rectangle.
            Default value is the coordinates for the northwest corner of Chicago. 
        botright: tuple that contains the coordinates for the southeast corner of the rectangle.
            Default value is the coordinates for the southeast corner of Chicago. 
        radius: interval at which the coordinates will be generated (in meters).
        
    Outputs:
        list of tuples containing coordinates covering the area of the rectangle. 
    '''
    coords, filler_tl, filler_br = gen_coordinates(topleft, botright, radius)
    #add the coordinates for the areas in between the initial circles
    coords += gen_coordinates(filler_tl, filler_br, radius)[0]
    return coords

 ## CHANGE to your API key
 Where the key string is passed to YelpAPI(): change that to your own!!

In [13]:
from yelpapi import YelpAPI 
yelp_api = YelpAPI('X5cEZuBhb6E5RmEG8MBX3tmznPKDvAeNccZPIfctHzVaPkitCOXhP-x0657Pq9aQ-7JE3twmkGD6WVCaSnmyh14vThLgsaPkeCSyutsLQDC7K08jUvNcv3_Zl_koYHYx')

There are 8500 businesses listed as in Chicago

Complete list of parameters and output can be found here:
https://www.yelp.com/developers/documentation/v3/business_search

List and descriptions of output: https://www.yelp.com/developers/documentation/v3/business

## Now make a loop to search for many:


In [14]:
coordinates = gen_coords_helper(radius=200)

take a look at some of the coordinates

In [24]:
len(coordinates)

12731

In [16]:
count_no_return = 0
chi_businesses = []
#idk maybe we want to keep this to check at a later point...
other_businesses = []

#change this to your coordinate range
coord_range = coordinates[770:790]

for geo_tup in coord_range:
    
    lat = geo_tup[0]
    long = geo_tup[1]
    response = yelp_api.search_query(latitude = lat, longitude = long, radius = 200, limit = 50)

    if len(response) == 0:
        count_no_return += 1
        #print("response:",response, "length response", len(response))
        
    elif len(response['businesses']) == 0:
        count_no_return += 1
        #print("response['businesses']:",response['businesses'], "length response", len(response['businesses']))
        
    elif len(response) > 0 and len(response['businesses']) > 0:
        for business in response['businesses']:
        #we could also use the zip codes here but that's much more difficult
            if business['location']['city'].lower() == 'chicago':
                chi_businesses.append(business)
            else:
                other_businesses.append(business)
        #print("businesses returned response looks like:", response)
        
    if len(response['businesses']) == 50:
        print("warning: there are over 50 businesses at location", lat, long, "consider breaking this down further")

print(count_no_return, "out of", len(coord_range), "searches returned no businesses")

14 out of 20 searches returned no businesses


Can look at the businesses returned here: (length may be a better way to 'look')

In [17]:
#chi_businesses

Collect all the business queries from the search output above

In [18]:
list_business_queries=[]

for j in range(len(chi_businesses)):
    response_ID = chi_businesses[j]['id']
    business_q = yelp_api.business_query(response_ID)
    list_business_queries.append(business_q)

Create a list of the relevant keys (note: you may be able to simply use the following (but I'm not positive):
['id', 'alias', 'name', 'image_url', 'is_claimed', 'is_closed', 'url', 'phone', 'display_phone', 'review_count', 'categories', 'rating', 'location', 'coordinates', 'photos', 'hours', 'transactions', 'price', 'messaging']

In [21]:
list_keys = []

for business in list_business_queries:
    for k in business.keys():
        if k not in list_keys:
            #print(k, "not in list keys yet")
            list_keys.append(k)


In [22]:
#here you can check if it matches
#list_keys

## Output to CSV 

In [23]:
#change this to another name
output_file_n = "testerFile.csv"

try:
    with open(output_file_n, 'w') as outFile:
        writer = csv.DictWriter(outFile, fieldnames = list_keys)
        writer.writeheader()
        for business in list_business_queries:
            writer.writerow(business)
except IOError:
    print("I/O Error")

### They provide another endpoint to make matching records from another database (e.g. City) easier too!
https://www.yelp.com/developers/documentation/v3/business_match