https://developers.google.com/places/web-service/search

Find some service for restaurant categories. 

In [2]:
import requests
import time 
import json

In [13]:
def google_place_crawl(center, search, key, depth = 1, sleep_seconds=10):
    pagetoken = ""
    payload = { 
        'location' : "{0},{1}".format(center['lat'],center['lng']),
        #'radius' :50000,
        'types' : 'restaurant',
        'keyword' : search,
        'rankby' : 'prominence',
        'key' : key
        }
    places_search_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'
    response = requests.get(places_search_url, params=payload)
    if response.ok:
        raw =  response.json()
        data = raw['results']
        pagetoken = raw.get('next_page_token',"")
    else:
        return { 'error' : True, 'status_code' : response.status_code, 'text' : response.text }

    for i in range(depth):
        #print('Depth:', i, 'Next Page Token', pagetoken)
        time.sleep(sleep_seconds)
        payload = { 'pagetoken' : pagetoken, 'key' : key }
        response = requests.get(places_search_url, params=payload)
        #print(response.url)
        if response.ok:
            raw = response.json()
            data += raw['results']
            pagetoken = raw.get('next_page_token',"")
            if pagetoken == "":
                break
        else:
            return { 'error' : True, 'status_code' : response.status_code, 'text' : response.text }

    return data


In [20]:
def deduplicate(dedupe, data, search, location):
    i=1
    for item in data:
        for ditem in dedupe:
            if ditem['name'] == item['name'] and ditem['vicinity'] == item['vicinity']:
                # update the search ranks
                ranks = ditem.get('search_ranks', [])
                ranks.append( { 'rank' : i, 'location' : location, 'keyword' : search })
                ditem['ranks'] = ranks
                break  #skip to next item in dedupe
        else:
            item['search_ranks'] = [{ 'rank' : i, 'location' : location, 'keyword' : search }]
            dedupe.append(item)
        i +=1
    return dedupe    

In [15]:
def read_locations(file):
    with open(file) as f:
        return json.loads(f.read())

In [5]:
class CrawlLog:
   
    def __init__(self, logfile):
        try:
            with open (logfile) as f:
                self.log = json.loads(f.read())
        except FileNotFoundError:
                self.log = []
        self.logfile = logfile
            
    def already_crawled(self, search, location):
        for row in self.log:
            if row['search'] == search and row['location'] == location:
                return True
        return False
    
    def append(self, item):
        self.log.append(item)
        
    def save(self):
        with open(logfile,'w') as f:
            f.write(json.dumps(self.log))
        
        
l = CrawlLog('goo.log')
l.log, l.logfile

([], 'goo.log')

In [21]:
#locations = read_locations('data/locations.json')
#cuisines = [ 'american', 'british', 'caribbean', 'chinese', 'french', 'greek', 'indian', 'italian', 'japanese', 'mediterranean', 'mexican', 'moroccan', 'spanish', 'thai', 'turkish', 'vietnamese']

locations = [ { 'lat' : 43.0481, 'lng' : -76.1474, 'place' : 'Syracuse, NY'} ]
cuisines = [ 'american', 'chinese', 'french','italian', 'japanese']
force_recrawl = False
crawl_log = []
google_key =  "AIzaSyCQfLehegVz905Cmq5iHOI1uw9rwPHQVno"

unique_places = []
for location in locations:
    print("Crawling Location", location)
    for search in cuisines:
        if force_recrawl or not already_crawled(crawl_log, search, location['place']):
            print("Searching for",search)
            places = google_place_crawl(location, search, google_key, depth=5)
            print("Places Crawled", len(places))
            unique_places = deduplicate(unique_places, places, search, location['place'])
            print("Unique Places After De-Duplication", len(unique_places))
            crawl_log.append({ 'timestamp' : time.time(), 'location' :location['place'], })

Crawling Location {'lat': 43.0481, 'lng': -76.1474, 'place': 'Syracuse, NY'}
Searching for american
Places Crawled 60
Places After De-Duplication 60
Searching for chinese
Places Crawled 60
Places After De-Duplication 116
Searching for french
Places Crawled 4
Places After De-Duplication 119
Searching for italian
Places Crawled 60
Places After De-Duplication 176
Searching for japanese
Places Crawled 32
Places After De-Duplication 200


In [22]:
i = 1
for d in unique_places: 
    print (i, d['name'], d['vicinity'],  d['place_id']) # ID and PLace ID are keys the same
    i += 1

1 The Mission Restaurant 304 E Onondaga St, Syracuse ChIJ2xD097vz2YkRP5n5o-hYFMo
2 317 @ MONTGOMERY STREET 317 Montgomery St, Syracuse ChIJ8edS5rvz2YkROKxZTP78zO8
3 Eden 118 E Genesee St, Syracuse ChIJ-7J9r7nz2YkRu3hwcKoYAYc
4 Redfield's Restaurant 701 E Genesee St Suite A, Syracuse ChIJ2SNg6aTz2YkRaY4MZOa-2YI
5 Eleven Waters 500 S Warren St, Syracuse ChIJo8R5WL7z2YkR7dg7pY6TGO0
6 Southern Tier Eat Smart New York 100 S Salina St, Syracuse ChIJDw3MV7jz2YkR7woH5_VgvB4
7 Modern Malt 325 S Clinton St, Syracuse ChIJS2dOz7jz2YkRN6bh8bgnvUo
8 Funk 'n Waffles 307-313 S Clinton St, Syracuse ChIJLX-Pxrjz2YkRLe-7F_VN3HE
9 The York 247 W Fayette St, Syracuse ChIJJdSLp7jz2YkRLDrtGu98O-M
10 Dinosaur Bar-B-Que 246 W Willow St, Syracuse ChIJkSmfAbjz2YkR5WIa4ilZjQU
11 Phoebe's Restaurant & Coffee Lounge 900 E Genesee St, Syracuse ChIJ4XPPn6bz2YkRKrZtWuXTaA0
12 1060 1060 E Genesee St, Syracuse ChIJbXjR46bz2YkRPHBXvYa28xg
13 Good To Go Cafe 725 Irving Ave, Syracuse ChIJ386V96Hz2YkRo5OLA-MJKjI
14 Rachel's

In [25]:
unique_places[42:43]

[{'geometry': {'location': {'lat': 43.048801, 'lng': -76.1401209},
   'viewport': {'northeast': {'lat': 43.05001882989273,
     'lng': -76.13878137010728},
    'southwest': {'lat': 43.04731917010728, 'lng': -76.14148102989272}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/restaurant-71.png',
  'id': 'f67b43af8315527f8a7065c39d41248689bf50ae',
  'name': 'Szechuan Kitchen',
  'opening_hours': {'open_now': True},
  'photos': [{'height': 760,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/100685374612890039880/photos">James Williams</a>'],
    'photo_reference': 'CmRaAAAAw-N6SU4863ug_FBhhBlG2cg-hj7B2DsuDLYERuCRX1J1Oga-9fmXB9HtZecPNLoiv2S3UMV-_94DtdyZK_sTdebzN2o-v1JoA9MxIAT_42Q8EXDu_VLzSuOY0cvdGAxBEhABlU4y8m8K5p1wDzSb3K_2GhSSOm8OpkonLIVgyoZeahez1UbBEg',
    'width': 1344}],
  'place_id': 'ChIJfYZ_rqXz2YkR3aHuDVffhrw',
  'plus_code': {'compound_code': '2VX5+GX Syracuse, New York',
   'global_code': '87M52VX5+GX'},
  'price_level': 1,
  'rating': 4,

In [107]:
## Getting the details

place_details_url = 'https://maps.googleapis.com/maps/api/place/details/json'
payload = { 
    'placeid' : places[0]['place_id'],
    'key' : google_key
          }
response = requests.get(place_details_url, params = payload)
print(response.url)
rest = response.json()
rest

https://maps.googleapis.com/maps/api/place/details/json?placeid=ChIJIYw-Srnz2YkROvNa1VH6YV4&key=AIzaSyCQfLehegVz905Cmq5iHOI1uw9rwPHQVno


{'html_attributions': [],
 'result': {'address_components': [{'long_name': '349',
    'short_name': '349',
    'types': ['street_number']},
   {'long_name': 'South Warren Street',
    'short_name': 'S Warren St',
    'types': ['route']},
   {'long_name': 'Downtown',
    'short_name': 'Downtown',
    'types': ['neighborhood', 'political']},
   {'long_name': 'Syracuse',
    'short_name': 'Syracuse',
    'types': ['locality', 'political']},
   {'long_name': 'Onondaga County',
    'short_name': 'Onondaga County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'New York',
    'short_name': 'NY',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '13202', 'short_name': '13202', 'types': ['postal_code']},
   {'long_name': '2007',
    'short_name': '2007',
    'types': ['postal_code_suffix']}],
  'adr_address': '<span class="street-address">3

In [None]:
rest['result'].keys()

In [26]:
time.time()

1548877320.3428605

In [106]:
import time 

center = { 'lat' : 43.0481, 'lng' : -76.1474 } # syracuse
search = 'pizza'
key =  "AIzaSyCQfLehegVz905Cmq5iHOI1uw9rwPHQVno"

pagetoken = ""
payload = { 
    'location' : "{0},{1}".format(center['lat'],center['lng']),
    #'radius' :50000,
    'types' : 'restaurant',
    'keyword' : search,
    'rankby' : 'distance',
    'key' : key
    }
places_search_url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json'
response = requests.get(places_search_url, params=payload)
print(1, response.url)
if response.ok:
    raw1 =  response.json()
    data = raw1['results']
    pagetoken = raw1['next_page_token']
    print(1,pagetoken)

time.sleep(10)
payload = { 'pagetoken' : pagetoken, 'key' : key }
response = requests.get(places_search_url, params=payload)
print(2, response.url)
if response.ok:
    raw2 = response.json()
    data += raw2['results']
    pagetoken = raw2.get('next_page_token',"")
    print(2,pagetoken)

time.sleep(10)
payload = { 'pagetoken' : pagetoken, 'key' : key }
response = requests.get(places_search_url, params=payload)
print(3, response.url)
if response.ok:
    raw3 = response.json()
    data += raw3['results']
    pagetoken = raw3.get('next_page_token',"")
    print(3,pagetoken)

1 https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=43.0481%2C-76.1474&types=restaurant&keyword=pizza&rankby=distance&key=AIzaSyCQfLehegVz905Cmq5iHOI1uw9rwPHQVno
1 CuQEVQIAAAhtMdzzXwKT5J66HkCpHfCOLvLfDkwLytVoM2vsN4OgfdzjQyGstnCAEdbbjAVc7M6frWiWiBv6wmkEMxcFlysREE_gAenxysBioILwL4pLj7ikyP09IHY-5pQlEVrJ6yxPdZlnuKQShhA9t-GqqARaYjPkbjqawBnQ_3SeZ9SrDo6QlK3f-obfDoplRRUcRNVT2rJi5mF8n7_gSadMz7qYQ3ZyhXPjRix1_x25CSEP4impYgWOoJRFVWlfMP9dWaB23FH3SWyHEqz9LNvJsq5LvwaGJflkk5ruhZ6MmIM40aIKI5V7lUQrqEPTs908wcxppcEJ39nqkgmXISE-Lz6uM5K-4-asX-5PfiwIm-GB5BB7J2Vi2_rLwygQgh_Ssunwu5jFLZKO3Yn-C9P-9TxRVDyFvi2CKNLTYHlhr7bmQa4un-HWP1mV6wnFPjNH1padcT73prXHrsbDX93AWGHHkMBkabBwEB1sMSNb6lFaLKqR8DhYAMPP5PE7A07rcQJD5xnx-7c3ib7APOVOee0Ftswrurrf77SUGoQbFJ83l6qYMseevi2i_Ht4pCpA_EC7es_2cNMfd8eFVXipIc49c_OlwDXiQxPHuadIDnU0HpBjidkx1D__0gRFbcQVftXsW9fuHSJOG2xWSKt_IW6fgWpNah7F5xghVKL1zcrcNLosU4TzPc0zU6nhfbDBnucjdTzUO-Jj67g0TV8-FPwfY80fuHlLUOjw4ZmU6ztdT5q2Ij5VJpfbGaPbf8Y9XFxIYLMMclSvgfIaoFgmQ4pOQM2Q_juSst2HqZIR

In [97]:
response.text


'{\n   "html_attributions" : [],\n   "results" : [],\n   "status" : "INVALID_REQUEST"\n}\n'