In [5]:
generated_model = 'gpt4omini'

In [6]:
import json
from fuzzywuzzy import fuzz
import numpy as np
import os

Load all pairs

In [7]:
with open ('../Datasets/Restaurants_task1.jsonl', 'r') as file:
    restaurants = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Hotels_task1.jsonl', 'r') as file:
    hotels = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Attractions_task1.jsonl', 'r') as file:
    attractions = [json.loads(line.strip()) for line in file]

In [8]:
def getID(name,address,category):
    #as long as there is a '-', then return -2
    #if there is an empty list, then return []
    #if the information doesn't match, return -1

    if name == "-" and address == "-":
        return -2

    #normal case
    idFromName = []
    idFromAddress = []

    address = address.split(",")[0]
    
    #restaurants
    if category == 'restaurants':
        for restaurant in restaurants:
            if restaurant['name'].lower() == name.lower():
                idFromName.append(restaurant['business_id'])
            if restaurant['address'].lower() == address.lower():
                idFromAddress.append(restaurant['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        #if the extracted id from name and address make an agreement
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        # we have to use similarity score to determine the id
        else:
            name_sim_score = []
            address_sim_score = []

            for restaurant in restaurants:
                name_sim_score.append(fuzz.ratio(name.lower(), restaurant['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), restaurant['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            #if the score is high enough, then we claim the id
            if max(scores) >= 120:
                return restaurants[np.argmax(scores)]['business_id']
            #if the score is less than 60 for each, then we indicate that the business is out of the pool
            else:
                return -1
    #attractions
    if category == 'attractions':
        for attraction in attractions:
            if attraction['name'].lower() == name.lower():
                idFromName.append(attraction['business_id'])
            if attraction['address'].lower() == address.lower():
                idFromAddress.append(attraction['business_id'])
        
        set1 = set(idFromName)
        set2 = set(idFromAddress)

        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for attraction in attractions:
                name_sim_score.append(fuzz.ratio(name.lower(), attraction['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), attraction['address'].lower()))

            if max(name_sim_score) == 100:
                return attractions[np.argmax(name_sim_score)]['business_id']

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return attractions[np.argmax(scores)]['business_id']
            else:
                return -1
    #hotels
    if category == 'hotels':
        for hotel in hotels:
            if hotel['name'].lower() == name.lower():
                idFromName.append(hotel['business_id'])
            if hotel['address'].lower() == address.lower():
                idFromAddress.append(hotel['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for hotel in hotels:
                name_sim_score.append(fuzz.ratio(name.lower(), hotel['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), hotel['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return hotels[np.argmax(scores)]['business_id']
            else:
                return -1

def prepareEval(plan):
    plan_eval = []
    for days in plan['itinerary']:
        day = {}
        day['days'] = days['days']
        #print(days['breakfast']['name'])
        day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
        day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
        day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
        day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
        day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
        day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
        day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
        plan_eval.append(day)
    return plan_eval

def evaluate_outSidePool(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            if isinstance(value, list):
                for id in value:
                    if id == -1:
                        return 1
            else:
                if value == -1:
                    return 1
    return 0

def evaluate_missingInfo(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            #night attraction can be skipped
            if key == 'night_attractions':
                continue

            if isinstance(value, list):

                if(len(value) == 0):
                        return 1
                else:
                    for val in value:
                        if val == -2:
                            return 1
            else:
                if value == -2:
                    return 1

    return 0

def evaluate_day(plan_eval,eval):
    day_numerator = 0
    day_denominator = 1

    if(len(plan_eval) == int(eval['day'][0][0])):
        day_numerator = 1
    return day_numerator, day_denominator

def evaluate_price(plan_eval,eval):
    price_numerator = 0
    price_denominator = 0

    price_map = {'cheap budget':['$','$$'],'moderate budget':['$','$$','$$$'],'expensive budget':['$$','$$$','$$$$']}
    price_limit = price_map[eval['price'][0]]

    #price - meals
    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])
    price_denominator += len(all_meals)

    for restaurant_id in all_meals:
        if restaurant_id != -1 and restaurant_id != -2:
            for restaurant in restaurants:
                if(restaurant['business_id'] == restaurant_id):
                    if(restaurant['price'] in price_limit):
                        price_numerator += 1


    #price hotel
    for day in plan_eval:
        hotel_id = day['accommodation']
        if hotel_id != -1 and hotel_id != -2:
            price_denominator += 1
            for hotel in hotels:
                if(hotel['business_id'] == hotel_id):
                    if(hotel['price'] in price_limit):
                        price_numerator += 1
    


    #price - attractions
    all_attractions = []
    for day in plan_eval:
        for id in day['morning_attractions']:
            all_attractions.append(id)
        for id in day['afternoon_attractions']:
            all_attractions.append(id)
        for id in day['night_attractions']:
            all_attractions.append(id)
    price_denominator += len(all_attractions)

    for attraction_id in all_attractions:
        if attraction_id != -1 and attraction_id != -2:
            for attraction in attractions:
                if(attraction['business_id'] == attraction_id):
                    if(attraction['price'] in price_limit):
                        price_numerator += 1


    return price_numerator, price_denominator



def evaluate_attraction_orientation(plan_eval,eval):
    #attraction orientation
    orientation_numerator = 0
    orientation_denominator = 0

    oritentation_limit = eval['attraction'][0]
    oritentation_category = oritentation_limit.replace(' ','_')
    oritentation_acceptable_list = ['medium ' + oritentation_limit, 'high ' + oritentation_limit]
    #print(oritentation_acceptable_list)
    all_attractions = []
    for day in plan_eval:
        for id in day['morning_attractions']:
            all_attractions.append(id)
        for id in day['afternoon_attractions']:
            all_attractions.append(id)
        for id in day['night_attractions']:
            all_attractions.append(id)
    orientation_denominator += len(all_attractions)
    for attraction_id in all_attractions:
        if attraction_id != -1 and attraction_id != -2:
            for attraction in attractions:
                if(attraction['business_id'] == attraction_id):
                    if attraction[oritentation_category] in oritentation_acceptable_list:
                        #print("attraction orientation is acceptable which is: ", attraction[oritentation_category])
                        orientation_numerator += 1
                        
    return orientation_numerator,orientation_denominator
                    #else:
                        #print("attraction orientation is not acceptable which is: ", attraction[oritentation_category])



def evaluate_cuisine(plan_eval,eval):
    #cuisine
    cuisine_numerator = 0
    cuisine_denominator = 0
    cuisine_satisfied = False

    cuisine_limit = [eval['cuisine'][0]]
    #print(cuisine_limit)
    if cuisine_limit == ['US']:
        cuisine_limit = ['American','American (New)','American (Traditional)']

    #at least we have one restaurant that match the cuisin, we will turn it to true
    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])
    cuisine_denominator += len(all_meals)
    #print(cuisine_denominator)

    for restaurant_id in all_meals:
        if restaurant_id != -1 and restaurant_id != -2:
            for restaurant in restaurants:
                if(restaurant['business_id'] == restaurant_id):
                    cuisine_provided = []
                    cuisine_provided.append(restaurant['cuisine_1'])
                    cuisine_provided.append(restaurant['cuisine_2'])
                    #print(restaurant_id)
                    #print((set(cuisine_provided))) 
                    if(len(list(set(cuisine_limit) & set(cuisine_provided))) > 0):
                        cuisine_numerator += 1
    #print(list(set(cuisine_limit)))
    
    return cuisine_numerator, cuisine_denominator


def evaluate_restaurants(plan_eval,eval):
    #restaurant 
    restaurants_numerator = 0
    restaurants_denominator = 0

    restaurants_limits = eval['restaurants']
    restaurants_category = [cat[5:] for cat in restaurants_limits]



    all_meals = []
    for day in plan_eval:
        all_meals.append(day['breakfast'])
        all_meals.append(day['lunch'])
        all_meals.append(day['dinner'])
    restaurants_denominator += len(all_meals)
    restaurants_denominator = restaurants_denominator * len(restaurants_category)

    for cat in restaurants_category:
        find_not_satisfication = False
        restaurants_acceptable_list = []
        restaurants_acceptable_list.append('good ' + cat)
        restaurants_acceptable_list.append('excellent ' + cat)

        for restaurant_id in set(all_meals): 
            if restaurant_id != -1 and restaurant_id != -2:
                for restaurant in restaurants:
                    if(restaurant['business_id'] == restaurant_id):
                        if(restaurant[cat] in restaurants_acceptable_list):
                            restaurants_numerator += 1
    return restaurants_numerator, restaurants_denominator



def evaluate_hotels(plan_eval,eval):
    #Hotel

    hotel_numerator = 0
    hotel_denominator = 0
    

    hotel_limit = eval['hotel']
    hotel_cat = [cat[5:] for cat in hotel_limit]

    all_hotels = []
    for day in plan_eval:
        hotel_id = day['accommodation']
        all_hotels.append(hotel_id)
    hotel_denominator += len(all_hotels)
    hotel_denominator *= len(hotel_cat)

    for cat in hotel_cat:
        find_not_satisfied = False
        hotel_acceptable_list = []
        hotel_acceptable_list.append('good ' + cat)
        hotel_acceptable_list.append('excellent ' + cat)

        for hotel_id in set(all_hotels):
            if hotel_id != -1 and hotel_id != -2:
                for hotel in hotels:
                    if(hotel['business_id'] == hotel_id):    
                        if(hotel[cat] in hotel_acceptable_list):
                            hotel_numerator += 1
    


    #note: only for hotel, we need to consider what if no recommendation, which means
    # all -2, we don't consider this in other categories since there is low chance that
    #llm didn't provide any reccommendation for food or attractions. 
    if(all(x == -2 for x in all_hotels)):
        hotel_numerator= 0 
        hotel_denominator = 0

    return hotel_numerator, hotel_denominator

def getFailure(failure_list):
    failure = [sum(x) for x in zip(*failure_list)]
    failure = [(x/len(failure_list)) for x in failure]
    return failure


def getMicro(preference_list):    
    micro = np.array([0,0])
    for record in preference_list:
        for cat in record:
            micro += np.array(cat)
    print(micro)
    return micro[0]/micro[1]

def getMacro(preference_list):    
    numerator = []
    denomminator = len(preference_list)
    for record in preference_list:
        each =  [sum(x) for x in zip(*record)]
        numerator.append(1 if each[0] == each[1] else 0)
    print(numerator, denomminator)
    return sum(numerator)/denomminator

def getNewMacro(preference_list, passrate):
    numerator = 0
    denominator = len(preference_list)

    for plan in preference_list:
        passedPlan = True
        for day in plan:
            if day[0] == day[1] == 0:
                continue
            if (day[0] / day[1]) < passrate:
                passedPlan = False
                break
        if passedPlan:
            numerator += 1

    return numerator/denominator




In [9]:
def getFailureAndPreferenceList(generated_model):
    failure_list = []
    preference_list = []
    for filename in os.listdir(f'Outputs/evals/{generated_model}'):
        #just a test
        #if(filename != 'Plan_Eval_1.json'):
        #    continue

    

        #load the plan in json format, and it's corresponding requirement eval json file.
        index = filename[:-5][10:]
        plan = json.load(open(f'Outputs/evals/{generated_model}/Plan_Eval_{index}.json'))
        eval = json.load(open(f'Prompts/evals/Prompt_Eval_{index}.json'))
        
        # Failure rate related
        # prepare a result list to return
        # outofpool, missinginfo,
        results = []
        # prepare the evaluation for each plan, search the business id
        plan_eval = prepareEval(plan)

        outsidepool = evaluate_outSidePool(plan_eval)
        results.append(outsidepool)


        missingInfo = evaluate_missingInfo(plan_eval)
        results.append(missingInfo)

        failure_list.append(results)

        # preference recall related
        
        results = []

        #day
        day_numerator, day_denominator = evaluate_day(plan_eval,eval)
        results.append([day_numerator, day_denominator])

        #price
        price_numerator, price_denominator = evaluate_price(plan_eval,eval)
        results.append([price_numerator, price_denominator])

        #attraction orientation
        attraction_numerator, attraction_denominator = evaluate_attraction_orientation(plan_eval,eval)
        results.append([attraction_numerator, attraction_denominator])
        
        #cuisine
        cuisine_numerator, cuisine_denominator = evaluate_cuisine(plan_eval,eval)
        results.append([cuisine_numerator, cuisine_denominator])

        #restaurants
        restaurants_numerator, restaurants_denominator = evaluate_restaurants(plan_eval,eval)
        results.append([restaurants_numerator, restaurants_denominator])

        #hotels
        hotels_numerator, hotels_denominator = evaluate_hotels(plan_eval,eval)
        results.append([hotels_numerator, hotels_denominator])

        preference_list.append(results)

    return failure_list, preference_list

In [10]:
failure_list, preference_list = getFailureAndPreferenceList(generated_model)

In [11]:
#out side the pool, missing info
failure_list

[[0, 0],
 [1, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0],
 [0, 0]]

In [12]:
#day (100%), price (half), attraction orientation (100%), 
# cuisine (100%), restaurants (0), hotels (half)
preference_list

[[[1, 1], [20, 24], [7, 12], [6, 9], [12, 18], [1, 3]],
 [[1, 1], [26, 28], [4, 12], [0, 12], [0, 0], [1, 8]],
 [[1, 1], [32, 32], [16, 16], [3, 12], [9, 12], [1, 8]],
 [[1, 1], [10, 16], [8, 8], [0, 6], [0, 0], [1, 2]],
 [[1, 1], [19, 24], [7, 12], [4, 9], [0, 0], [1, 9]],
 [[1, 1], [31, 32], [15, 16], [3, 12], [0, 0], [0, 4]],
 [[1, 1], [24, 32], [15, 16], [5, 12], [0, 0], [0, 4]],
 [[1, 1], [15, 16], [5, 8], [1, 6], [0, 0], [0, 2]],
 [[1, 1], [20, 24], [7, 12], [4, 9], [0, 0], [0, 3]],
 [[1, 1], [22, 32], [16, 16], [7, 12], [0, 0], [0, 4]],
 [[1, 1], [14, 16], [8, 8], [1, 6], [0, 0], [2, 6]]]

In [13]:
failure = getFailure(failure_list)
print(failure)

[0.09090909090909091, 0.0]


In [14]:
micro = getMicro(preference_list)
print(micro) # will normalized this after make all denomiator 0, means at least one requirement for each category.

[414 611]
0.6775777414075287


In [15]:
macro = getMacro(preference_list)
print(macro) # macro is getting useless here. 

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 11
0.0


In [16]:
newMacro = getNewMacro(preference_list, 0.5)
newMacro

0.0