# **NYC Restaurant YELP API**

Joseph Lardie

December 2023

# **Imports**

In [1]:
# Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Additional Imports
import os, json, math, time
from yelpapi import YelpAPI
from tqdm.notebook import tqdm_notebook

# **Calling API**

In [2]:
# Load API Credentials
with open('/Users/davyd/.secret/yelp_api.json','r') as f:
    login = json.load(f)

In [3]:
# Login Key
login.keys()

dict_keys(['client-id', 'api-key'])

In [4]:
# Instantiate YelpAPI Variable
yelp = YelpAPI(login['api-key'],timeout_s=5.0)

# **Defining Search Parameters**

In [5]:
# set our API call parameters and filename before the first call
location = 'New York, NY 10009'
term = 'Pizza'

In [6]:
location.split(',')[0]

'New York'

In [7]:
## Specify fodler for saving data


FOLDER ='Data/'

os.makedirs(FOLDER,exist_ok=True)

# Specifying JSON_FILE filename (can include a folder)
JSON_FILE = FOLDER+f"{location.split(',')[0]}-{term}.json"

In [8]:
## Check if JSON_FILE exists
file_exists = os.path.isfile(JSON_FILE)

## If it does not exist: 
if file_exists ==False:
    ## CREATE ANY NEEDED FOLDERS
    # Get the Folder Name only
    folder = os.path.dirname(JSON_FILE)
    
    ## If JSON_FILE included a folder:
    if len(folder)>0:
        # create the folder
        os.makedirs(folder,exist_ok=True)
        
    ## INFORM USER AND SAVE EMPTY LIST
    print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
    
    ## save the first page of results
    with open(JSON_FILE,'w')as f:
        json.dump([],f)
        
## If it exists, inform user
else:
       print(f"[i] {JSON_FILE} already exists.")

[i] Data/New York-Pizza.json not found. Saving empty list to file.


In [9]:
results = yelp.search_query(term=term,location = location)

In [10]:
type(results)

dict

In [11]:
len(results)

3

In [12]:
results.keys()

dict_keys(['businesses', 'total', 'region'])

In [13]:
results['total']

1400

In [14]:
results['region']

{'center': {'longitude': -73.97995948791504, 'latitude': 40.72677021626302}}

In [15]:
results['businesses']

[{'id': 'oOr6Ta5gZyDl6PyZ1cq7pg',
  'alias': 'east-village-pizza-new-york',
  'name': 'East Village Pizza',
  'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/KOrE1ez2CtKZ44Un7EFaTQ/o.jpg',
  'is_closed': False,
  'url': 'https://www.yelp.com/biz/east-village-pizza-new-york?adjust_creative=TqzhtA42spPSKOmzDuw9JA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=TqzhtA42spPSKOmzDuw9JA',
  'review_count': 531,
  'categories': [{'alias': 'pizza', 'title': 'Pizza'}],
  'rating': 4.0,
  'coordinates': {'latitude': 40.728212, 'longitude': -73.985182},
  'transactions': ['pickup', 'delivery'],
  'price': '$',
  'location': {'address1': '145 1st Ave',
   'address2': '',
   'address3': '',
   'city': 'New York',
   'zip_code': '10003',
   'country': 'US',
   'state': 'NY',
   'display_address': ['145 1st Ave', 'New York, NY 10003']},
  'phone': '+12125294545',
  'display_phone': '(212) 529-4545',
  'distance': 469.92936068672833},
 {'id': '-CCfu19Uuy_kD2t4ZwsQfA',
  'al

In [16]:
pd.DataFrame(results['businesses'])

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,oOr6Ta5gZyDl6PyZ1cq7pg,east-village-pizza-new-york,East Village Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/KOrE1e...,False,https://www.yelp.com/biz/east-village-pizza-ne...,531,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.728212, 'longitude': -73.985182}","[pickup, delivery]",$,"{'address1': '145 1st Ave', 'address2': '', 'a...",12125294545.0,(212) 529-4545,469.929361
1,-CCfu19Uuy_kD2t4ZwsQfA,village-square-pizza-new-york-3,Village Square Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/3gFPiI...,False,https://www.yelp.com/biz/village-square-pizza-...,299,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.5,"{'latitude': 40.72748, 'longitude': -73.98268}","[pickup, delivery]",$,"{'address1': '147 Ave A', 'address2': None, 'a...",19176757709.0,(917) 675-7709,249.475729
2,zj8Lq1T8KIC5zwFief15jg,prince-street-pizza-new-york-2,Prince Street Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/bckPg7...,False,https://www.yelp.com/biz/prince-street-pizza-n...,5156,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,"{'latitude': 40.72308755605564, 'longitude': -...","[pickup, delivery]",$,"{'address1': '27 Prince St', 'address2': None,...",12129664100.0,(212) 966-4100,1294.328146
3,WG639VkTjmK5dzydd1BBJA,rubirosa-new-york-2,Rubirosa,https://s3-media3.fl.yelpcdn.com/bphoto/l0Phrn...,False,https://www.yelp.com/biz/rubirosa-new-york-2?a...,3242,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.5,"{'latitude': 40.722766, 'longitude': -73.996233}",[pickup],$$,"{'address1': '235 Mulberry St', 'address2': ''...",12129650500.0,(212) 965-0500,1441.82686
4,0UdC0VYX6aAvLiOcpR4MVQ,joes-pizza-new-york-7,Joe's Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/QXu6RG...,False,https://www.yelp.com/biz/joes-pizza-new-york-7...,1083,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.73331, 'longitude': -73.98763}","[pickup, delivery]",$,"{'address1': '150 E 14th St', 'address2': '', ...",12123889474.0,(212) 388-9474,968.586097
5,vfYhEpp0x-DrNjC6GSJjPQ,artichoke-basilles-pizza-new-york-2,Artichoke Basille's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/4krjwP...,False,https://www.yelp.com/biz/artichoke-basilles-pi...,4217,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.73216, 'longitude': -73.983877}","[pickup, delivery]",$,"{'address1': '321 E 14th St', 'address2': '', ...",12122282004.0,(212) 228-2004,684.947759
6,GyN4Kl0vFwziRN-h-Ow7ig,saint-pizza-new-york,Saint Pizza,https://s3-media1.fl.yelpcdn.com/bphoto/6gTOBJ...,False,https://www.yelp.com/biz/saint-pizza-new-york?...,16,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.72905715251202, 'longitude': -...","[pickup, delivery]",$,"{'address1': '223 Avenue B', 'address2': '', '...",16463988250.0,(646) 398-8250,297.880462
7,TWH4MjLtN1fKlF-7n6YXHg,lil-frankies-new-york,Lil Frankie's,https://s3-media3.fl.yelpcdn.com/bphoto/t0OsEV...,False,https://www.yelp.com/biz/lil-frankies-new-york...,1515,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,"{'latitude': 40.723625, 'longitude': -73.988555}","[pickup, delivery]",$$,"{'address1': '19 1st Ave', 'address2': '', 'ad...",12124204900.0,(212) 420-4900,795.591829
8,ow-zOxbslAoVw_uZ5UYoOA,chrissy-s-pizza-new-york,Chrissy’s Pizza,https://s3-media2.fl.yelpcdn.com/bphoto/CPG7Wc...,False,https://www.yelp.com/biz/chrissy-s-pizza-new-y...,1,"[{'alias': 'pizza', 'title': 'Pizza'}]",5.0,"{'latitude': 40.72742, 'longitude': -73.983531}",[],,"{'address1': '430 E 9th St', 'address2': '', '...",,,317.800247
9,uc5qQMzs96rzjK27epDCug,joes-pizza-new-york-4,Joe's Pizza,https://s3-media3.fl.yelpcdn.com/bphoto/FKjd9R...,False,https://www.yelp.com/biz/joes-pizza-new-york-4...,3096,"[{'alias': 'pizza', 'title': 'Pizza'}]",4.0,"{'latitude': 40.73060076, 'longitude': -74.002...",[delivery],$,"{'address1': '7 Carmine St', 'address2': '', '...",12123661182.0,(212) 366-1182,1918.585269


In [17]:
# How many did we get the details for?
results_per_page = len(results['businesses'])
results_per_page

20

In [18]:
# Use math.ceil to round up for the total number of pages of results.
n_pages = math.ceil((results['total'])/ results_per_page)
n_pages

70

In [19]:
# Converting the JSON file to a dataframe
nydf = pd.read_json(JSON_FILE)

In [20]:
nydf.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 0 entries
Empty DataFrame


In [21]:
# Dropping irrelevant columns that do not affect predicting the coorelation or good inspection scores to good yelp ratings
nydf.drop(['display_phone', 'phone', 'distance', 'price', 'id', 'is_closed'], axis=1, inplace=True)

KeyError: "['display_phone', 'phone', 'distance', 'price', 'id', 'is_closed'] not found in axis"

In [None]:
print(df['review_count'].unique())


In [None]:
# Function to filter out restaurants with 20 or fewer reviews
def filter_restaurants(df):
    filtered_dataframe = df[df['review_count'] >= 20]
    return filtered_dataframe

# executing function to eliminate restaurants with less than 20 reviews
df = filter_restaurants(df)

In [None]:
# Saving dataset to upload into other notebooks
df.to_csv('nydf', index=False)