In [16]:
import os
import requests 
from dotenv import load_dotenv
from llama_index.core import Document, VectorStoreIndex

In [3]:
from serpapi import GoogleSearch

In [18]:
load_dotenv()

YELP_API_KEY = os.getenv("YELP_API_KEY")
SERP_API_KEY = os.getenv("SERP_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:
LOCATION = "New York City"
SEARCH_TERM = "Restaurants"
NUM_TO_FETCH = 1  
SORT_BY = "best_match"

HEADERS = {
    "Accept": "application/json",
    "Authorization": f"Bearer {YELP_API_KEY}"
}

url = f"https://api.yelp.com/v3/businesses/search?location={LOCATION}&term={SEARCH_TERM}&limit={NUM_TO_FETCH}&sort_by={SORT_BY}"

response = requests.get(url, headers=HEADERS)

print(response.text)

VPMvKI4b2oumYFyXumf3Z2ixDbc4JcSYF8mBhLZJNexwltIpLEg7Ra7mGuHMrFijWcyFCiNYSoKOGuqIBd_P1WKiYJ53_cRkjk3MTf7aktsyifVj-XHjOtSu4L14Z3Yx
{"businesses": [{"id": "klAhw3xLQi9GF1tf_HnS7w", "alias": "izakaya-mew-new-york-3", "name": "Izakaya MEW", "image_url": "https://s3-media4.fl.yelpcdn.com/bphoto/-56mpvzUi0nfYADw2uQXhg/o.jpg", "is_closed": false, "url": "https://www.yelp.com/biz/izakaya-mew-new-york-3?adjust_creative=eJlDLyKWtirAqCcbkO6d9Q&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=eJlDLyKWtirAqCcbkO6d9Q", "review_count": 3130, "categories": [{"alias": "tapas", "title": "Tapas Bars"}, {"alias": "sushi", "title": "Sushi Bars"}, {"alias": "izakaya", "title": "Izakaya"}], "rating": 4.3, "coordinates": {"latitude": 40.750148, "longitude": -73.9863203465939}, "transactions": ["delivery", "pickup"], "price": "$$", "location": {"address1": "53 W 35th St", "address2": "", "address3": "", "city": "New York", "zip_code": "10001", "country": "US", "state": "NY", "display_addres

In [11]:
def filter_yelp_results(data):
    """
    Filters each business in the Yelp-like JSON, 
    keeping only the specified fields:
      - id
      - name
      - image_url
      - business_url 
      - display_address
      - display_phone
      - rating
    
    Returns a new dictionary with the 'businesses' key.
    """
    output = {"businesses": []}
    
    # Loop through businesses in the original data
    for b in data.get("businesses", []):
        business_id = b.get("id")
        name = b.get("name")
        image_url = b.get("image_url")
        rating = b.get("rating")
        display_phone = b.get("display_phone")
        
        location = b.get("location", {})
        display_address = location.get("display_address", [])
        
        attributes = b.get("attributes", {})
        business_url = attributes.get("business_url")
        
        filtered_business = {
            "id": business_id,
            "name": name,
            "image_url": image_url,
            "business_url": business_url,
            "display_address": display_address[0],
            "display_phone": display_phone,
            "rating": rating
        }
        
        output["businesses"].append(filtered_business)
    
    return output


filtered_data = filter_yelp_results(response.json())
print(filtered_data)

{'businesses': [{'id': 'klAhw3xLQi9GF1tf_HnS7w', 'name': 'Izakaya MEW', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/-56mpvzUi0nfYADw2uQXhg/o.jpg', 'business_url': 'http://mewnyc.com', 'display_address': '53 W 35th St', 'display_phone': '(646) 368-9384', 'rating': 4.3}]}


In [None]:
def extract_review_text(business_id, num, api_key):
  """
    Extracts review texts for a given Yelp business using the SerpApi Yelp Reviews engine.

    Returns a list of review texts.
  """
  params = {
    "api_key": api_key,
    "engine": "yelp_reviews",
    "place_id": business_id,
    "num": num
  }
  search = GoogleSearch(params)
  results = search.get_dict()

  reviews_data = []
  if "reviews" in results:
        for review in results["reviews"]:
            comment_text = review.get("comment", {}).get("text", None)
            if comment_text:
                reviews_data.append(comment_text)
    
  return reviews_data

for i in range(len(filtered_data["businesses"])):
    business_id = filtered_data["businesses"][i]["id"]
    reviews = extract_review_text(business_id, 3, SERP_API_KEY)
    filtered_data["businesses"][i]["reviews"] = reviews


In [13]:
print(filtered_data)

{'businesses': [{'id': 'klAhw3xLQi9GF1tf_HnS7w', 'name': 'Izakaya MEW', 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/-56mpvzUi0nfYADw2uQXhg/o.jpg', 'business_url': 'http://mewnyc.com', 'display_address': '53 W 35th St', 'display_phone': '(646) 368-9384', 'rating': 4.3, 'reviews': ["One of the best meals I've ever had. We randomly found this place and must I say WOW! We ordered quite a few tapas and shared. We all raved over the croquettes and the kimchi udon. I will dream about that udon!!! \n\nAll our food came out fast, staff was super friendly, and we were seated quickly. We were there at lunch time and had no wait, but I imagine this place could fill up quickly. I HIGHLY recommend this place if you find yourself in Koreatown!", "The atmosphere is casual here, and the service is pretty attentive for the most part. I don't have complaints for either, and this place is definitely one of the more affordable restaurants in K-town. There's a lot of variety on the menu, which mak

In [26]:
# Step 1: Process JSON into Documents with Metadata
documents = []

for business in filtered_data["businesses"]:
    # Metadata to be attached to each review
    metadata = {
        "business_id": business["id"],
        "name": business["name"],
        "image_url": business["image_url"],
        "business_url": business["business_url"],
        "display_address": business["display_address"],
        "display_phone": business["display_phone"],
        "rating": business["rating"]
    }
    
    # Create a Document for each review
    for review in business["reviews"]:
        documents.append(Document(text=review, metadata=metadata))

# Step 2: Build the Vector Store Index
vector_index = VectorStoreIndex.from_documents(documents)

# Step 3: Query the Index
query_engine = vector_index.as_query_engine()

# Step 4: Configure the Retriever
retriever = vector_index.as_retriever(
    top_k=4,  # Number of top results to retrieve
)

# Example Retrieval
query = "What restaurants should I try that have great Japanese food?"
retrieved_docs = retriever.retrieve(query)

print("-" * 80)

# Access the metadata of the retrieved documents
for doc in retrieved_docs:
    print("Review Text:", doc.text)  # The review content
    print("Metadata:", doc.metadata)  # The attached metadata
    print("-" * 80)

--------------------------------------------------------------------------------
Review Text: The atmosphere is casual here, and the service is pretty attentive for the most part. I don't have complaints for either, and this place is definitely one of the more affordable restaurants in K-town. There's a lot of variety on the menu, which makes it great for taking anyone whose unsure of what they really want.

Age Tofu (6/10): Not the worst agedashi tofu I've had, but definitely one of the cheaper ones I've tasted
Kara Age (6.5/10): This is some pretty standard kara age fried chicken. Similar to the age tofu above
Kani Cream Croquettes (7/10): Pretty good croquettes that are fried well
Takoyaki Kara Age (7/10): I like this dish a lot, but could be better
Salmon Salad (8/10): The sashimi here is surprisingly solid. The salmon is a standout of this dish, and I would definitely recommend any raw salmon dish you order here.
Okonomiyaki (6.5/10): This is similar to the chicken kara age, but i