In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/clean_data.csv')
df.columns = df.columns.str.lower()
df = df.dropna(subset=['primarytype'])

In [3]:
documents = df.to_dict(orient='records')

In [4]:
from openai import OpenAI

client = OpenAI()

In [5]:
prompt_template = """
You emulate a user of our Restaurant Suggestion Toronto assistant application.
An application where a user writes a query about what he wants to eat in terms of dishes, atmosphere, type of food.
Formulate 5 queries this user might ask based on a provided review.
Make the queries specific to the given reviews.
The record is the answer to the queries, and the queries should
be complete and not too short. Avoid the use of questions. Use as few words as possible from the record. 

The record:

primarytype: {primarytype}
editorialsummary: {editorialsummary}
reviews: {reviews}

Provide the output in parsable JSON without using code blocks:

{{"queries": ["query1", "query2", "query3", "query4", "query5"]}}
""".strip()

In [6]:
prompt = prompt_template.format(**documents[0])
print(prompt)

You emulate a user of our Restaurant Suggestion Toronto assistant application.
An application where a user writes a query about what he wants to eat in terms of dishes, atmosphere, type of food.
Formulate 5 queries this user might ask based on a provided review.
Make the queries specific to the given reviews.
The record is the answer to the queries, and the queries should
be complete and not too short. Avoid the use of questions. Use as few words as possible from the record. 

The record:

primarytype: american_restaurant
editorialsummary: Straightforward eatery with an old-school vibe for diner-style fare from eggs to burgers to pie.
reviews: This restaurant is a beloved family spot that offers a delightful retro experience reminiscent of 60s-style American diners. Known for its fantastic value, the meals are generously portioned and freshly prepared in an open kitchen, making it a cozy choice for breakfast and brunch. Guests rave about the perfect over-easy eggs, delectable home frie

In [7]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [8]:
queries = llm(prompt)
queries

'{"queries": ["looking for a cozy breakfast spot with retro diner vibes and generous portions", "seeking delicious over-easy eggs and fluffy pancakes in a nostalgic setting", "wanting a family-friendly restaurant with great brunch options and welcoming ambiance", "interested in a place serving high-quality eggs benedict with fantastic service", "in search of a diner-style eatery with fresh fruit toppings on waffles and classic music"]}'

In [9]:
import json

In [10]:
json.loads(queries)

{'queries': ['looking for a cozy breakfast spot with retro diner vibes and generous portions',
  'seeking delicious over-easy eggs and fluffy pancakes in a nostalgic setting',
  'wanting a family-friendly restaurant with great brunch options and welcoming ambiance',
  'interested in a place serving high-quality eggs benedict with fantastic service',
  'in search of a diner-style eatery with fresh fruit toppings on waffles and classic music']}

In [11]:
def generate_queries(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [12]:
from tqdm.auto import tqdm

In [13]:
results = {}

In [14]:
for doc in tqdm(documents): 
    doc_id = doc['id']
    if doc_id in results:
        continue

    queries_raw = generate_queries(doc)
    queries = json.loads(queries_raw)
    results[doc_id] = queries['queries']

  0%|          | 0/2025 [00:00<?, ?it/s]

In [15]:
final_results = []

for doc_id, queries in results.items():
    for q in queries:
        final_results.append((doc_id, q))

In [16]:
final_results[0]

('ChIJ2e-AEyMzK4gRC3Ta0xpNT2s',
 'looking for a retro diner experience with breakfast options')

In [17]:
df_results = pd.DataFrame(final_results, columns=['id', 'queries'])

In [18]:
df_results.to_csv('../data/ground-truth-retrieval.csv', index=False)

In [19]:
!head ../data/ground-truth-retrieval.csv

id,queries
ChIJ2e-AEyMzK4gRC3Ta0xpNT2s,looking for a retro diner experience with breakfast options
ChIJ2e-AEyMzK4gRC3Ta0xpNT2s,want a cozy place for brunch with generous portions
ChIJ2e-AEyMzK4gRC3Ta0xpNT2s,craving light and fluffy pancakes in a nostalgic setting
ChIJ2e-AEyMzK4gRC3Ta0xpNT2s,interested in excellent service and welcoming ambiance for morning meals
ChIJ2e-AEyMzK4gRC3Ta0xpNT2s,seeking savory dishes like eggs benedict and sausage
ChIJv6F6Usw0K4gRQt20A8k3Jws,Looking for a barbecue restaurant with Southern-inspired dishes and a casual atmosphere.
ChIJv6F6Usw0K4gRQt20A8k3Jws,Desiring a place with comfort food like smoked beef brisket and Korean Fried Cauliflower.
ChIJv6F6Usw0K4gRQt20A8k3Jws,Searching for a restaurant suitable for both group dinners and romantic evenings.
ChIJv6F6Usw0K4gRQt20A8k3Jws,Interested in trying unique cocktails like the Electric Island and Black Mamba Margarita.
