In [2]:
# Import the modules
import requests
import json
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import requests 
from config import yelpkey

In [3]:
# Define API Key, Endpoint, and Header
API_KEY = yelpkey
ENDPOINT = 'https://api.yelp.com/v3/businesses/search'
HEADERS = {'Authorization': 'bearer %s' % API_KEY}

In [4]:
# import NY zip codes list from CSV & convert into dataframe
# note, to test, I'm using an abbreciated version of our zip code list 

zipcodes_data = "clean_operational_ny_zips_SHORT.csv"
zipcodes_df = pd.read_csv(zipcodes_data)
zipcodes_df

Unnamed: 0,Zip Code
0,10924
1,10918
2,10028
3,10022
4,10001


In [5]:
# Add column to hold number of restaurants
zipcodes_df["# Restaurants"] = ""
zipcodes_df

Unnamed: 0,Zip Code,# Restaurants
0,10924,
1,10918,
2,10028,
3,10022,
4,10001,


In [7]:
# create a loop that looks up the zip in each row and adds total # Chinese rest to the column
for index, row in zipcodes_df.iterrows():    
    zip = row['Zip Code']
    PARAMETERS = {'categories': 'restaurant,chinese,all',
                  'location': zip,
                  'limit': 50,
                  'offset': 0,
                  'sort_by': 'distance'}
    try:
        print(f'Finding "{PARAMETERS["categories"]}" info for zip code: {zip}')
        business_data = requests.get(url=ENDPOINT, params=PARAMETERS, headers=HEADERS).json()
        print(f'Total Results: {business_data["total"]}')
        
        # loop through the search results, counting up the ones that are actually in the zip code
        count_in_zip = 0 
        for restaurant in business_data['businesses']:
            if restaurant['location']['zip_code'] == str(PARAMETERS['location']):
                count_in_zip += 1
            zipcodes_df.loc[index, "# Restaurants"] = count_in_zip

        # if the requested search result total is larger than 50 (max request size), we need to make
        # multiple requests to get all the data. We do this using the 'offset' parameter. However, Yelp 
        # only allows us to pull up to 1000 search results, so we also need to make sure we don't loop
        # past 1000 results
        if int(business_data['total']) > 50:
            yelp_result_count = 1000 if int(business_data['total']) > 1000 else int(business_data['total'])
            for i in range(PARAMETERS['limit'] + 1, yelp_result_count, 50):
                PARAMETERS['offset'] = i
                business_data = requests.get(url=ENDPOINT, params=PARAMETERS, headers=HEADERS).json()
                for restaurant in business_data['businesses']:
                    if restaurant['location']['zip_code'] == str(PARAMETERS['location']):
                        count_in_zip += 1
                    zipcodes_df.loc[index, "# Restaurants"] = count_in_zip
        
        # print how many found direclty in zip code
        print(f'{count_in_zip} found directly in {PARAMETERS["location"]}')
    except json.decoder.JSONDecodeError:
        print("Missing field/result... skipping.")
    print("-------------------------------------------")

Finding "restaurant,chinese,all" info for zip code: 10924
Total Results: 159
2 found directly in 10924
-------------------------------------------
Finding "restaurant,chinese,all" info for zip code: 10918
Total Results: 44
2 found directly in 10918
-------------------------------------------
Finding "restaurant,chinese,all" info for zip code: 10028
Total Results: 29
8 found directly in 10028
-------------------------------------------
Finding "restaurant,chinese,all" info for zip code: 10022
Total Results: 54
19 found directly in 10022
-------------------------------------------
Finding "restaurant,chinese,all" info for zip code: 10001
Total Results: 65
15 found directly in 10001
-------------------------------------------


In [8]:
# print the data frame, which should now include the Chinese restaurant count by zip
zipcodes_df


Unnamed: 0,Zip Code,# Restaurants
0,10924,2
1,10918,2
2,10028,8
3,10022,19
4,10001,15
