In [81]:
import json
import numpy as np


def getRequirements(requirements):
    day = requirements['day']
    price = requirements['price']
    attraction = requirements['attraction']
    cuisine = requirements['cuisine']
    restaurant = requirements['restaurant']
    hotel = requirements['hotel']
    return day, price, attraction, cuisine, restaurant,hotel

def mapBudget(price):
    if price[0] == 'expensive budget':
        return ['$$','$$$','$$$$']
    if price[0] == 'moderate budget':
        return ['$','$$','$$$']
    if price[0] == 'cheap budget':
        return ['$','$$']

def mapRes(restaurant):
    #['good freshness', 'good environment']
    limits = []
    for lim in restaurant:
        limits.append(lim.split(' ')[1])
    return limits

def getResList(price, cuisine, restaurant, restaurants):
    res_cands = []
    price_limit = mapBudget(price)
    res_limit = mapRes(restaurant)

    if cuisine == ['US']:
        cuisine = ['American','American (New)','American (Traditional)']
    for res in restaurants:
        if res['price'] != None and res['price'] in price_limit:
            if res['cuisine_1'] in cuisine or res['cuisine_2'] in cuisine:
                for lim in res_limit:
                    if (res[lim] == 'good '+ lim) or (res[lim] == 'excellent ' + lim):
                        res_cands.append(res)
    return res_cands

def getAttList(price, attraction, attractions):
    att_cands = []
    price_limit = mapBudget(price)
    for att in attractions:
        if att['price'] in price_limit:
            if att[attraction[0]] == 'medium ' + attraction[0] or att[attraction[0]] == 'high ' + attraction[0]:
                att_cands.append(att)
            #if att[attraction]
    return att_cands

def mapHot(hotel):
    limits = []
    for lim in hotel:
        limits.append(lim.split(' ')[1])
    return limits

def getHotelList(price, hotel, hotels):
    hotel_cands = []
    price_limit = mapBudget(price)
    hotel_limit = mapHot(hotel)
    for hot in hotels:
        if hot['price'] in price_limit:
            for lim in hotel_limit:
                if (hot[lim] == 'good '+ lim) or (hot[lim] == 'excellent ' + lim):
                    hotel_cands.append(hot)
    return hotel_cands

def euclidean_distance(coord1, coord2):
    return ((coord1[0] - coord2[0]) ** 2 + (coord1[1] - coord2[1]) ** 2) ** 0.5

def arrangeRoute(day, att_finals, hotel_finals):
    att_cords = []
    hotel_cords = []

    for i in att_finals:
        att_cords.append([i['latitude'],i['longitude']])
    for i in hotel_finals:
        hotel_cords.append([i['latitude'],i['longitude']])

    orders = naive_route(day, att_cords, hotel_cords)
    orders = np.array(orders).flatten()
    att_order = []
    for o in orders:
        att_order.append(att_finals[o])
    return att_order

def naive_route(days, att_coords, hotel_coords):
    if len(att_coords) < days * 4:
        raise ValueError("Not enough attractions to cover all days")
    
    hotel = tuple(hotel_coords[0])
    remaining_indices = set(range(len(att_coords)))
    full_plan = []
    
    for _ in range(days):
        if len(remaining_indices) < 4:
            break
        
        # Start from the closest attraction to the hotel
        first_index = min(remaining_indices, key=lambda i: euclidean_distance(hotel, att_coords[i]))
        day_route = [first_index]
        remaining_indices.remove(first_index)
        
        # Greedily choose the next closest attractions
        current_index = first_index
        for _ in range(3):  # Visit 4 attractions in total
            next_index = min(remaining_indices, key=lambda i: euclidean_distance(att_coords[current_index], att_coords[i]))
            day_route.append(next_index)
            remaining_indices.remove(next_index)
            current_index = next_index
        
        full_plan.append(day_route)
    
    return full_plan

def parseEval(res_info, att_info, hotel_info):
    itinerary = []
    
    # Ensure there's enough data for at least one day
    days = min(len(res_info) // 3, len(att_info) // 4)
    
    for i in range(days):
        day_plan = {
            "days": str(i + 1),
            "breakfast": {"name": res_info[i * 3][0], "address": res_info[i * 3][1]},
            "morning_attractions": [{"name": att_info[i * 4][0], "address": att_info[i * 4][1]}],
            "lunch": {"name": res_info[i * 3 + 1][0], "address": res_info[i * 3 + 1][1]},
            "afternoon_attractions": [
                {"name": att_info[i * 4 + 1][0], "address": att_info[i * 4 + 1][1]},
                {"name": att_info[i * 4 + 2][0], "address": att_info[i * 4 + 2][1]}
            ],
            "dinner": {"name": res_info[i * 3 + 2][0], "address": res_info[i * 3 + 2][1]},
            "night_attractions": [{"name": att_info[i * 4 + 3][0], "address": att_info[i * 4 + 3][1]}],
            "accommodation": {"name": hotel_info[0][0], "address": hotel_info[0][1]}
        }
        itinerary.append(day_plan)
    
    return {"index": 1, "plan": {"itinerary": itinerary}}


with open('Prompts/evals.jsonl', 'r') as f:
    evals = [json.loads(line.strip()) for line in f]

with open ('Dataset/gpt4o/attractions.jsonl', 'r') as f:
    attractions = [json.loads(line.strip()) for line in f]

with open('Dataset/gpt4o/hotels.jsonl', 'r') as f:
    hotels = [json.loads(line.strip()) for line in f]

with open('Dataset/gpt4o/restaurants.jsonl', 'r') as f:
    restaurants = [json.loads(line.strip()) for line in f]

numPlan = 100

for i in range(numPlan):
    requirements = evals[i]['eval_info']
    day, price, attraction, cuisine, restaurant,hotel = getRequirements(requirements)

    res_cands = getResList(price, cuisine, restaurant,restaurants)
    att_cands = getAttList(price, attraction,attractions)
    hotel_cands = getHotelList(price, hotel,hotels)

    resNumber = int(day[0][0]) * 3
    attNumber = int(day[0][0]) * 4
    hotelNumber = 1
    #random select from candidates
    if(resNumber > len(res_cands)):
        allow_replace = True
    else:
        allow_replace = False
    res_finals = np.random.choice(res_cands, size=resNumber, replace=allow_replace)
    att_finals = np.random.choice(att_cands, size=attNumber, replace=False)
    hotel_finals = np.random.choice(hotel_cands, size=hotelNumber, replace=False)
    
    #get the route calculated naively
    attOrder = arrangeRoute(int(day[0][0]),att_finals, hotel_finals)

    res_info = [[i['name'],i['address']] for i in res_finals]
    att_info = [[i['name'],i['address']] for i in attOrder]
    hotel_info = [[i['name'],i['address']] for i in hotel_finals]
    
    parsed_eval = parseEval(res_info, att_info, hotel_info)

    with open('Output/greedy/evals/filteredDataRouteOP.jsonl', 'a') as f:
        f.write(json.dumps(parsed_eval) + '\n')

ValueError: 'a' cannot be empty unless no samples are taken

In [None]:
# cuisine like Irish cannot be satisfied.
# there's something wrong with the plan generator
#there is something wrong with the actual evaluation