In [24]:
import json
from fuzzywuzzy import fuzz
import numpy as np
import os

Load single pair

In [5]:
with open ('Outputs/Task1_json/Task1_json_1.json') as f:
    plan = json.load(f)

In [6]:
with open ('Prompts/Task1_eval/Task1_eval_1.json') as f:
    eval = json.load(f)

Load all pairs

In [25]:
with open ('../Datasets/Restaurants_task1.jsonl', 'r') as file:
    restaurants = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Hotels_task1.jsonl', 'r') as file:
    hotels = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Attractions_task1.jsonl', 'r') as file:
    attractions = [json.loads(line.strip()) for line in file]

In [196]:
def getID(name,address,category):
    #as long as there is a '-', then return -2
    #if there is an empty list, then return []
    #if the information doesn't match, return -1

    if name == "-" and address == "-":
        return -2

    #normal case
    idFromName = []
    idFromAddress = []

    address = address.split(",")[0]
    
    #restaurants
    if category == 'restaurants':
        for restaurant in restaurants:
            if restaurant['name'].lower() == name.lower():
                idFromName.append(restaurant['business_id'])
            if restaurant['address'].lower() == address.lower():
                idFromAddress.append(restaurant['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        #if the extracted id from name and address make an agreement
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        # we have to use similarity score to determine the id
        else:
            name_sim_score = []
            address_sim_score = []

            for restaurant in restaurants:
                name_sim_score.append(fuzz.ratio(name.lower(), restaurant['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), restaurant['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            #if the score is high enough, then we claim the id
            if max(scores) >= 120:
                return restaurants[np.argmax(scores)]['business_id']
            #if the score is less than 60 for each, then we indicate that the business is out of the pool
            else:
                return -1
    #attractions
    if category == 'attractions':
        for attraction in attractions:
            if attraction['name'].lower() == name.lower():
                idFromName.append(attraction['business_id'])
            if attraction['address'].lower() == address.lower():
                idFromAddress.append(attraction['business_id'])
        
        set1 = set(idFromName)
        set2 = set(idFromAddress)

        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for attraction in attractions:
                name_sim_score.append(fuzz.ratio(name.lower(), attraction['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), attraction['address'].lower()))

            if max(name_sim_score) == 100:
                return attractions[np.argmax(name_sim_score)]['business_id']

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return attractions[np.argmax(scores)]['business_id']
            else:
                return -1
    #hotels
    if category == 'hotels':
        for hotel in hotels:
            if hotel['name'].lower() == name.lower():
                idFromName.append(hotel['business_id'])
            if hotel['address'].lower() == address.lower():
                idFromAddress.append(hotel['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for hotel in hotels:
                name_sim_score.append(fuzz.ratio(name.lower(), hotel['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), hotel['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return hotels[np.argmax(scores)]['business_id']
            else:
                return -1

def prepareEval(plan):
    plan_eval = []
    for days in plan['itinerary']:
        day = {}
        day['days'] = days['days']
        day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
        day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
        day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
        day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
        day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
        day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
        day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
        plan_eval.append(day)
    return plan_eval

def evaluate_outSidePool(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            if isinstance(value, list):
                for id in value:
                    if id == -1:
                        return 1
            else:
                if value == -1:
                    return 1
    return 0

def evaluate_missingInfo(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            #night attraction can be skipped
            if key == 'night_attractions':
                continue

            if isinstance(value, list):

                if(len(value) == 0):
                        return 1
                else:
                    for val in value:
                        if val == -2:
                            return 1
            else:
                if value == -2:
                    return 1

    return 0

def evaluate_day(plan_eval,eval):
    day_numerator = 0
    day_denominator = 1

    if(len(plan_eval) == int(eval['day'][0][0])):
        day_numerator = 1
    return day_numerator, day_denominator

def evaluate_price(plan_eval,eval):
    price_numerator = 1
    price_denominator = 1

    price_map = {'cheap budget':['$','$$'],'moderate budget':['$','$$','$$$'],'expensive budget':['$','$$','$$$','$$$$']}
    price_limit = price_map[eval['price'][0]]

    #price - meals
    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])

    for restaurant_id in all_meals:
        if restaurant_id != -1 and restaurant_id != -2:
            for restaurant in restaurants:
                if(restaurant['business_id'] == restaurant_id):
                    if(restaurant['price'] in price_limit):
                        continue
                    else:
                        #print(restaurant['name'],price_limit)
                        price_numerator = 0


    #price hotel
    for day in plan_eval:
        hotel_id = day['accommodation']
        if hotel_id != -1 and hotel_id != -2:
            for hotel in hotels:
                if(hotel['business_id'] == hotel_id):
                    if(hotel['price'] == 'Unknown price'):
                        continue
                    if(hotel['price'] in price_limit):
                        continue
                    else:
                        #print(hotel['name'],price_limit)
                        price_numerator = 0
    


    #price - attractions
    all_attractions = []
    for day in plan_eval:
        for id in day['morning_attractions']:
            all_attractions.append(id)
        for id in day['afternoon_attractions']:
            all_attractions.append(id)
        for id in day['night_attractions']:
            all_attractions.append(id)
    for attraction_id in all_attractions:
        if attraction_id != -1 and attraction_id != -2:
            for attraction in attractions:
                if(attraction['business_id'] == attraction_id):
                    if(attraction['price'] in price_limit):
                        continue
                    else:
                        #print(attraction['name'],price_limit)
                        price_numerator = 0


    return price_numerator, price_denominator



def evaluate_attraction_orientation(plan_eval,eval):
    #attraction orientation
    orientation_numerator = 0
    orientation_denominator = 1

    oritentation_limit = eval['attraction'][0]
    oritentation_category = oritentation_limit.replace(' ','_')
    oritentation_acceptable_list = ['medium ' + oritentation_limit, 'high ' + oritentation_limit]
    #print(oritentation_acceptable_list)
    all_attractions = []
    for day in plan_eval:
        for id in day['morning_attractions']:
            all_attractions.append(id)
        for id in day['afternoon_attractions']:
            all_attractions.append(id)
        for id in day['night_attractions']:
            all_attractions.append(id)
    for attraction_id in all_attractions:
        if attraction_id != -1 and attraction_id != -2:
            for attraction in attractions:
                if(attraction['business_id'] == attraction_id):
                    if attraction[oritentation_category] in oritentation_acceptable_list:
                        #print("attraction orientation is acceptable which is: ", attraction[oritentation_category])
                        orientation_numerator = 1
                        return orientation_numerator,orientation_denominator
                    #else:
                        #print("attraction orientation is not acceptable which is: ", attraction[oritentation_category])



def evaluate_cuisine(plan_eval,eval):
    #cuisine
    cuisine_numerator = 0
    cuisine_denominator = 1
    cuisine_satisfied = False

    cuisine_limit = [eval['cuisine'][0]]
    if cuisine_limit == ['US']:
        cuisine_limit = ['American','American (New)','American (Traditional)']

    #at least we have one restaurant that match the cuisin, we will turn it to true
    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])


    for restaurant_id in all_meals:
        if restaurant_id != -1 and restaurant_id != -2:
            for restaurant in restaurants:
                if(restaurant['business_id'] == restaurant_id):
                    cuisine_provided = []
                    cuisine_provided.append(restaurant['cuisine_1'])
                    cuisine_provided.append(restaurant['cuisine_2'])
                    if(len(list(set(cuisine_limit) & set(cuisine_provided))) > 0):
                        cuisine_numerator = 1
                        
    return cuisine_numerator, cuisine_denominator


def evaluate_restaurants(plan_eval,eval):
    #restaurant 
    restaurants_numerator = len(eval['restaurants'])
    restaurants_denominator = len(eval['restaurants'])

    restaurants_limits = eval['restaurants']
    restaurants_category = [cat[5:] for cat in restaurants_limits]



    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])

    for cat in restaurants_category:
        find_not_satisfication = False
        restaurants_acceptable_list = []
        restaurants_acceptable_list.append('good ' + cat)
        restaurants_acceptable_list.append('excellent ' + cat)

        for restaurant_id in set(all_meals): 
            if restaurant_id != -1 and restaurant_id != -2:
                for restaurant in restaurants:
                    if(restaurant['business_id'] == restaurant_id):
                        if(restaurant[cat] in restaurants_acceptable_list):
                            continue
                        else:
                            restaurants_numerator -= 1
                            find_not_satisfication = True
                            break
                if find_not_satisfication:
                    break
        if find_not_satisfication:
            continue

    return restaurants_numerator, restaurants_denominator



def evaluate_hotels(plan_eval,eval):
    #Hotel

    hotel_numerator = len(eval['hotel'])
    hotel_denominator = len(eval['hotel'])
    

    hotel_limit = eval['hotel']
    hotel_cat = [cat[5:] for cat in hotel_limit]

    all_hotels = []
    for day in plan_eval:
        hotel_id = day['accommodation']
        all_hotels.append(hotel_id)

    for cat in hotel_cat:
        find_not_satisfied = False
        hotel_acceptable_list = []
        hotel_acceptable_list.append('good ' + cat)
        hotel_acceptable_list.append('excellent ' + cat)

        for hotel_id in set(all_hotels):
            if hotel_id != -1 and hotel_id != -2:
                for hotel in hotels:
                    if(hotel['business_id'] == hotel_id):    
                        if(hotel[cat] in hotel_acceptable_list):
                            continue
                        else:
                            hotel_numerator -= 1
                            find_not_satisfied = True
                            break
                if find_not_satisfied:
                    break
        if find_not_satisfied:
            continue
    


    #note: only for hotel, we need to consider what if no recommendation, which means
    # all -2, we don't consider this in other categories since there is low chance that
    #llm didn't provide any reccommendation for food or attractions. 
    if(all(x == -2 for x in all_hotels)):
        hotel_numerator= 0 
        hotel_denominator = 0

    return hotel_numerator, hotel_denominator


In [197]:
counter = 0
failure_list = []
preference_list = []
for filename in os.listdir('Outputs/evals'):
    counter += 1
    #just a test
    #if(filename != 'Plan_Eval_6.json'):
    #    continue

   

    #load the plan in json format, and it's corresponding requirement eval json file.
    index = filename[:-5][10:]
    plan = json.load(open(f'Outputs/evals/Plan_Eval_{index}.json'))
    eval = json.load(open(f'Prompts/evals/Prompt_Eval_{index}.json'))
    
    # Failure rate related
    # prepare a result list to return
    # outofpool, missinginfo,
    results = []
    # prepare the evaluation for each plan, search the business id
    plan_eval = prepareEval(plan)

    outsidepool = evaluate_outSidePool(plan_eval)
    results.append(outsidepool)


    missingInfo = evaluate_missingInfo(plan_eval)
    results.append(missingInfo)

    failure_list.append(results)

    # preference recall related
    
    results = []

    #day
    day_numerator, day_denominator = evaluate_day(plan_eval,eval)
    results.append([day_numerator, day_denominator])

    #price
    price_numerator, price_denominator = evaluate_price(plan_eval,eval)
    results.append([price_numerator, price_denominator])

    #attraction orientation
    attraction_numerator, attraction_denominator = evaluate_attraction_orientation(plan_eval,eval)
    results.append([attraction_numerator, attraction_denominator])
    
    #cuisine
    cuisine_numerator, cuisine_denominator = evaluate_cuisine(plan_eval,eval)
    results.append([cuisine_numerator, cuisine_denominator])

    #restaurants
    restaurants_numerator, restaurants_denominator = evaluate_restaurants(plan_eval,eval)
    results.append([restaurants_numerator, restaurants_denominator])

    #hotels
    hotels_numerator, hotels_denominator = evaluate_hotels(plan_eval,eval)
    results.append([hotels_numerator, hotels_denominator])

    preference_list.append(results)

In [198]:
#sum on the elements of each element, this is the failure count
failure_list = [sum(x) for x in zip(*failure_list)]
failure_list = [(x/counter) for x in failure_list]
failure_list

[0.2, 0.2]

In [199]:
preference_list

[[[1, 1], [1, 1], [1, 1], [1, 1], [0, 1], [0, 1]],
 [[1, 1], [0, 1], [1, 1], [1, 1], [0, 1], [1, 1]],
 [[1, 1], [0, 1], [1, 1], [1, 1], [0, 2], [1, 1]],
 [[1, 1], [0, 1], [1, 1], [1, 1], [0, 2], [1, 2]],
 [[1, 1], [0, 1], [1, 1], [1, 1], [0, 1], [0, 1]],
 [[1, 1], [0, 1], [1, 1], [1, 1], [0, 1], [0, 1]],
 [[1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [1, 1]],
 [[1, 1], [1, 1], [1, 1], [1, 1], [0, 1], [1, 2]],
 [[1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [1, 1]],
 [[1, 1], [1, 1], [1, 1], [1, 1], [0, 1], [1, 2]]]