In [24]:
import json
from fuzzywuzzy import fuzz
import numpy as np
import os

Load single pair

In [5]:
with open ('Outputs/Task1_json/Task1_json_1.json') as f:
    plan = json.load(f)

In [6]:
with open ('Prompts/Task1_eval/Task1_eval_1.json') as f:
    eval = json.load(f)

Load all pairs

In [25]:
with open ('../Datasets/Restaurants_task1.jsonl', 'r') as file:
    restaurants = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Hotels_task1.jsonl', 'r') as file:
    hotels = [json.loads(line.strip()) for line in file]

with open ('../Datasets/Attractions_task1.jsonl', 'r') as file:
    attractions = [json.loads(line.strip()) for line in file]

In [61]:
def getID(name,address,category):
    #as long as there is a '-', then return -2
    #if there is an empty list, then return []
    #if the information doesn't match, return -1

    if name == "-" and address == "-":
        return -2

    #normal case
    idFromName = []
    idFromAddress = []

    address = address.split(",")[0]
    
    #restaurants
    if category == 'restaurants':
        for restaurant in restaurants:
            if restaurant['name'].lower() == name.lower():
                idFromName.append(restaurant['business_id'])
            if restaurant['address'].lower() == address.lower():
                idFromAddress.append(restaurant['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        #if the extracted id from name and address make an agreement
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        # we have to use similarity score to determine the id
        else:
            name_sim_score = []
            address_sim_score = []

            for restaurant in restaurants:
                name_sim_score.append(fuzz.ratio(name.lower(), restaurant['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), restaurant['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            #if the score is high enough, then we claim the id
            if max(scores) >= 120:
                return restaurants[np.argmax(scores)]['business_id']
            #if the score is less than 60 for each, then we indicate that the business is out of the pool
            else:
                return -1
    #attractions
    if category == 'attractions':
        for attraction in attractions:
            if attraction['name'].lower() == name.lower():
                idFromName.append(attraction['business_id'])
            if attraction['address'].lower() == address.lower():
                idFromAddress.append(attraction['business_id'])
        
        set1 = set(idFromName)
        set2 = set(idFromAddress)

        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for attraction in attractions:
                name_sim_score.append(fuzz.ratio(name.lower(), attraction['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), attraction['address'].lower()))

            if max(name_sim_score) == 100:
                return attractions[np.argmax(name_sim_score)]['business_id']

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return attractions[np.argmax(scores)]['business_id']
            else:
                return -1
    #hotels
    if category == 'hotels':
        for hotel in hotels:
            if hotel['name'].lower() == name.lower():
                idFromName.append(hotel['business_id'])
            if hotel['address'].lower() == address.lower():
                idFromAddress.append(hotel['business_id'])
        set1 = set(idFromName)
        set2 = set(idFromAddress)
        if(len(set1 & set2) == 1):
            return list(set1 & set2)[0]
        else:
            name_sim_score = []
            address_sim_score = []

            for hotel in hotels:
                name_sim_score.append(fuzz.ratio(name.lower(), hotel['name'].lower()))
                address_sim_score.append(fuzz.ratio(address.lower(), hotel['address'].lower()))

            scores = np.array(name_sim_score) + np.array(address_sim_score)
            if max(scores) >= 120:
                return hotels[np.argmax(scores)]['business_id']
            else:
                return -1

def prepareEval(plan):
    plan_eval = []
    for days in plan['itinerary']:
        day = {}
        day['days'] = days['days']
        day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
        day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
        day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
        day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
        day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
        day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
        day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
        plan_eval.append(day)
    return plan_eval

def evaluate_outSidePool(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            if isinstance(value, list):
                for id in value:
                    if id == -1:
                        return 1
            else:
                if value == -1:
                    return 1
    return 0

def evaluate_missingInfo(plan_eval):
    for day in plan_eval:
        for key,value in day.items():
            #night attraction can be skipped
            if key == 'night_attractions':
                continue

            if isinstance(value, list):

                if(len(value) == 0):
                        return 1
                else:
                    for val in value:
                        if val == -2:
                            return 1
            else:
                if value == -2:
                    return 1

    return 0

In [62]:
for filename in os.listdir('Outputs/evals'):
    #just a test
    if(filename != 'Plan_Eval_6.json'):
        continue

    #prepare a result list to return
    #outofpool, missinginfo,
    results = []

    #load the plan in json format, and it's corresponding requirement eval json file.
    index = filename[:-5][10:]
    plan = json.load(open(f'Outputs/evals/Plan_Eval_{index}.json'))
    eval = json.load(open(f'Prompts/evals/Prompt_Eval_{index}.json'))
    
    #prepare the evaluation for each plan, search the business id
    plan_eval = prepareEval(plan)
    print(plan_eval)

    outsidepool = evaluate_outSidePool(plan_eval)
    results.append(outsidepool)


    missingInfo = evaluate_missingInfo(plan_eval)
    results.append(missingInfo)








    print(results)

[{'days': 'Day 1', 'breakfast': -2, 'morning_attractions': [], 'lunch': 'LQZdcwfqneUIrZTy1VvWfg', 'afternoon_attractions': [-1], 'dinner': 'G2oWC7vWPTiTZt--0bUMzA', 'night_attractions': ['LklDTDwgJLbUjd67ZKsvCg'], 'accommodation': '55gCXlWDDCdttR3yRss1xw'}, {'days': 'Day 2', 'breakfast': 'K7KHmHzxNwzqiijSJeKe_A', 'morning_attractions': [], 'lunch': 'ytynqOUb3hjKeJfRj5Tshw', 'afternoon_attractions': ['4mWzXhD8vo0bABVCGAhlqA'], 'dinner': 'IcLm9Z1zphLnzUvrFK6aiA', 'night_attractions': [-1], 'accommodation': '55gCXlWDDCdttR3yRss1xw'}, {'days': 'Day 3', 'breakfast': 'rYqmaOIULRouz_1db07OdQ', 'morning_attractions': [], 'lunch': 'NJe5fkia_63rdKXRdJlmkA', 'afternoon_attractions': ['dBRWKIS7h-qZCi8EjUJ2HA'], 'dinner': '9mA9qYqiv4c0T9ASid3PIQ', 'night_attractions': [], 'accommodation': '55gCXlWDDCdttR3yRss1xw'}]
[1, 1]


In [19]:
getID('Amada','217 Chestnut St','restaurants')

'S8ZFYEgMejpChID8tzKo9A'

In [8]:
plan_eval = []
for days in plan['itinerary']:
    day = {}
    day['days'] = days['days']
    day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
    day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
    day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
    day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
    day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
    day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
    day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
    plan_eval.append(day)
plan_eval

[{'days': '1',
  'breakfast': 'TwnzM8mJn_nT2PJf1x-9kQ',
  'morning_attractions': ['Ib9HV7ekw459jM1Ksdiyiw'],
  'lunch': 'wHkYLlZyPXllrQRlvidUlg',
  'afternoon_attractions': ['kbEVlzQLcYS3JSQPG9QMOQ',
   '8_O6LXLyMgpq1g9CIwcW4w'],
  'dinner': 'S8ZFYEgMejpChID8tzKo9A',
  'night_attractions': ['pxZAz8pv18wK_t-m8WpN0g'],
  'accommodation': '3QPAh9VvYNTnqAFgBeBcng'},
 {'days': '2',
  'breakfast': 'rYqmaOIULRouz_1db07OdQ',
  'morning_attractions': ['Qw7tz-UkPrpXaVidWuab4Q'],
  'lunch': '6ajnOk0GcY9xbb5Ocaw8Gw',
  'afternoon_attractions': ['4mWzXhD8vo0bABVCGAhlqA',
   'ytynqOUb3hjKeJfRj5Tshw'],
  'dinner': '05ev984NYfimRN0UiFrxaA',
  'night_attractions': ['dBRWKIS7h-qZCi8EjUJ2HA'],
  'accommodation': '3QPAh9VvYNTnqAFgBeBcng'}]

3. Micro (is each reccommendation meet the preference)

### Failure Rate

1. Outside the pool

In [9]:
with open ('Outputs/Task1_json/Task1_json_1_outOfPool.json') as f:
    plan_outOfPool = json.load(f)

In [10]:
plan_eval_outOfPool = []
for days in plan_outOfPool['itinerary']:
    day = {}
    day['days'] = days['days']
    day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
    day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
    day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
    day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
    day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
    day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
    day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
    plan_eval_outOfPool.append(day)
plan_eval_outOfPool

[{'days': '1',
  'breakfast': -2,
  'morning_attractions': ['Ib9HV7ekw459jM1Ksdiyiw'],
  'lunch': -1,
  'afternoon_attractions': [],
  'dinner': 'S8ZFYEgMejpChID8tzKo9A',
  'night_attractions': ['pxZAz8pv18wK_t-m8WpN0g'],
  'accommodation': -2},
 {'days': '2',
  'breakfast': 'rYqmaOIULRouz_1db07OdQ',
  'morning_attractions': ['Qw7tz-UkPrpXaVidWuab4Q'],
  'lunch': '6ajnOk0GcY9xbb5Ocaw8Gw',
  'afternoon_attractions': [-1, 'ytynqOUb3hjKeJfRj5Tshw'],
  'dinner': '05ev984NYfimRN0UiFrxaA',
  'night_attractions': [],
  'accommodation': -2}]

In [11]:
for day in plan_eval_outOfPool:
    for key,value in day.items():
        if isinstance(value, list):
            for id in value:
                if id == -1:
                    print('outside the pool')
        else:
            if value == -1:
                print('outside the pool')

outside the pool
outside the pool


2. Missing info

In [12]:
with open ('Outputs/Task1_json/Task1_json_1_missingInfo.json') as f:
    plan_missingInfo = json.load(f)

In [13]:
plan_eval_missingInfo = []
for days in plan_missingInfo['itinerary']:
    day = {}
    day['days'] = days['days']
    day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')
    day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]
    day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')
    day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]
    day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')
    day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]
    day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')
    plan_eval_missingInfo.append(day)
plan_eval_missingInfo

[{'days': '1',
  'breakfast': -2,
  'morning_attractions': ['Ib9HV7ekw459jM1Ksdiyiw'],
  'lunch': 'wHkYLlZyPXllrQRlvidUlg',
  'afternoon_attractions': [],
  'dinner': 'S8ZFYEgMejpChID8tzKo9A',
  'night_attractions': ['pxZAz8pv18wK_t-m8WpN0g'],
  'accommodation': -2},
 {'days': '2',
  'breakfast': 'rYqmaOIULRouz_1db07OdQ',
  'morning_attractions': ['Qw7tz-UkPrpXaVidWuab4Q'],
  'lunch': '6ajnOk0GcY9xbb5Ocaw8Gw',
  'afternoon_attractions': ['4mWzXhD8vo0bABVCGAhlqA',
   'ytynqOUb3hjKeJfRj5Tshw'],
  'dinner': '05ev984NYfimRN0UiFrxaA',
  'night_attractions': [],
  'accommodation': -2}]

In [14]:
miss_info = False

for day in plan_eval_missingInfo:
    for key,value in day.items():
        #night attraction can be skipped
        if key == 'night_attractions':
            continue

        if isinstance(value, list):
           if(len(value) == 0):
                print('miss list info')
                miss_info = True
        else:
            if value == -2:
                print('miss other info')
                miss_info = True

if(miss_info):
    print("miss info")

miss other info
miss list info
miss other info
miss other info
miss info


(?/X) satisfied - micro, (?/1) passes - macro

In [16]:
eval

{'day': ['2 day'],
 'price': ['moderate budget'],
 'attraction': ['activity oriented'],
 'cuisine': ['Italian'],
 'restaurants': [],
 'hotel': ['good quality', 'good location'],
 'preference_count': [6]}

In [19]:
preference_count = eval['preference_count'][0]
preference_count

6

In [234]:
plan_test = [{
    'days': '1',
    'breakfast': -2,
    'morning_attractions': ['Ib9HV7ekw459jM1Ksdiyiw'],
    'lunch': -1,
    'afternoon_attractions': [],
    'dinner': 'qCpdeQYPf9i1EZhTnQgjtg',
    'night_attractions': ['pxZAz8pv18wK_t-m8WpN0g'],
    'accommodation': 'QMHd6Z2djm_SBLzGhLvFqw'
},
{
    'days': '2',
    'breakfast': 'rYqmaOIULRouz_1db07OdQ',
    'morning_attractions': ['Qw7tz-UkPrpXaVidWuab4Q'],
    'lunch': '6ajnOk0GcY9xbb5Ocaw8Gw',
    'afternoon_attractions': [-1, 'ytynqOUb3hjKeJfRj5Tshw'],
    'dinner': '05ev984NYfimRN0UiFrxaA',
    'night_attractions': [],
    'accommodation': 'QMHd6Z2djm_SBLzGhLvFqw'
}]

In [209]:
eval_test = {
    "day": [
        "2 day"
    ],
    "price": [
        "moderate budget"
    ],
    "attraction": [
        "activity oriented"
    ],
    "cuisine": [
        "Italian"
    ],
    "restaurants": [
        "good flavor",
        "good freshness"
    ],
    "hotel": [
        "good quality",
        "good location"
    ],
    "preference_count": [
        8
    ]
}

In [144]:
preference_satisfied = 0

In [173]:
with open ('Datasets/Restaurants_task1.jsonl', 'r') as file:
    restaurants = [json.loads(line.strip()) for line in file]

with open ('Datasets/Hotels_task1.jsonl', 'r') as file:
    hotels = [json.loads(line.strip()) for line in file]

with open ('Datasets/Attractions_task1.jsonl', 'r') as file:
    attractions = [json.loads(line.strip()) for line in file]

In [196]:
#day
day_numerator = 0
day_denominator = 1
day_satisfied = False

if(len(plan_test) == int(eval['day'][0][0])):
    day_satisfied = True
    day_numerator = 1
if(day_satisfied):
    preference_satisfied += 1
print(day_numerator, "/", day_denominator, day_satisfied)

1 / 1 True


In [222]:
#price
price_numerator = 1
price_denominator = 1
price_satisfied = True

price_map = {'cheap budget':['$','$$'],'moderate budget':['$','$$','$$$'],'expensive':['$','$$','$$$','$$$$']}
price_limit = price_map[eval['price'][0]]
print('our price limit is: ', price_limit)
print("============================")

#price - meals
print("=== Meals ======")
all_meals = []
for day in plan_test:
    all_meals.append(day['breakfast'])
    all_meals.append(day['lunch'])
    all_meals.append(day['dinner'])

for restaurant_id in all_meals:
    if restaurant_id != -1 and restaurant_id != -2:
        for restaurant in restaurants:
            if(restaurant['business_id'] == restaurant_id):
                if(restaurant['price'] in price_limit):
                    print("the price is within the range")
                else:
                    print("the price is out side the range")
                    price_satisfied = False
                    price_numerator = 0
    else:
        print(f"this meal is out of pool or missing")


#price hotel
print("=== Hotel ======")
for day in plan_test:
    hotel_id = day['accommodation']
    #hotel_id = 'QMHd6Z2djm_SBLzGhLvFqw'
    if hotel_id != -1 and hotel_id != -2:
        for hotel in hotels:
            if(hotel['business_id'] == hotel_id):    
                if(hotel['price'] in price_limit):
                    print("the price is within the range")
                else:
                    print("the price is out side the range")
                    price_satisfied = False
                    price_numerator = 0
    else:
        print(f"the hotel is out of pool or missing")


#price - attractions
print("=== Attractions ======")
all_attractions = []
for day in plan_test:
    for id in day['morning_attractions']:
        all_attractions.append(id)
    for id in day['afternoon_attractions']:
        all_attractions.append(id)
    for id in day['night_attractions']:
        all_attractions.append(id)
for attraction_id in all_attractions:
    if attraction_id != -1 and attraction_id != -2:
        for attraction in attractions:
            if(attraction['business_id'] == attraction_id):
                if(attraction['price'] in price_limit):
                    print("the price is withint the range")
                else:
                    print("the price is out side the range")
                    price_satisfied = False
                    price_numerator = 0
    else:
        print(f"the attraction is out of pool or missing")


print(price_numerator, "/", price_denominator, price_satisfied)

our price limit is:  ['$', '$$', '$$$']
this meal is out of pool or missing
this meal is out of pool or missing
the price is within the range
the price is within the range
the price is within the range
the price is within the range
the price is within the range
the price is within the range
the price is withint the range
the price is withint the range
the price is withint the range
the attraction is out of pool or missing
the price is withint the range
1 / 1 True


In [224]:
#attraction orientation
orientation_numerator = 1
orientation_denominator = 1
orientation_satisfied = True

oritentation_limit = eval['attraction'][0]
oritentation_category = oritentation_limit.replace(' ','_')
oritentation_acceptable_list = ['medium ' + oritentation_limit, 'high ' + oritentation_limit]
print(oritentation_acceptable_list)
all_attractions = []
for day in plan_test:
    for id in day['morning_attractions']:
        all_attractions.append(id)
    for id in day['afternoon_attractions']:
        all_attractions.append(id)
    for id in day['night_attractions']:
        all_attractions.append(id)
for attraction_id in all_attractions:
    if attraction_id != -1 and attraction_id != -2:
        for attraction in attractions:
            if(attraction['business_id'] == attraction_id):
                if attraction[oritentation_category] in oritentation_acceptable_list:
                    print("attraction orientation is acceptablem which is: ", attraction[oritentation_category])
                else:
                    print("attraction orientation is not acceptable which is: ", attraction[oritentation_category])
                    orientation_satisfied = False
                    orientation_numerator = 0
    
print(orientation_numerator, "/", orientation_denominator, orientation_satisfied)

['medium activity oriented', 'high activity oriented']
attraction orientation is not acceptable which is:  low activity oriented
attraction orientation is acceptablem which is:  medium activity oriented
attraction orientation is not acceptable which is:  low activity oriented
attraction orientation is acceptablem which is:  medium activity oriented
0 / 1 False


In [235]:
#cuisine
cuisine_numerator = 0
cuisine_denominator = 1
cuisine_satisfied = False

cuisine_limit = eval['cuisine'][0]
print("we want: ", cuisine_limit)

#at least we have one restaurant that match the cuisin, we will turn it to true
all_meals = []
for day in plan_test:
    all_meals.append(day['breakfast'])
    all_meals.append(day['lunch'])
    all_meals.append(day['dinner'])


for restaurant_id in all_meals:
    if restaurant_id != -1 and restaurant_id != -2:
        for restaurant in restaurants:
            if(restaurant['business_id'] == restaurant_id):
                cuisine_provided = []
                cuisine_provided.append(restaurant['cuisine_1'])
                cuisine_provided.append(restaurant['cuisine_2'])
                if(cuisine_limit in cuisine_provided):
                    print("the cuisine match with", cuisine_provided)
                    cuisine_satisfied = True
                    cuisine_numerator = 1
                else:
                    print("the cuisine doesn't match with", cuisine_provided)
    else:
        print(f"this meal is out of pool or missing")

print(cuisine_numerator, "/", cuisine_denominator, cuisine_satisfied)

we want:  Italian
this meal is out of pool or missing
this meal is out of pool or missing
the cuisine doesn't match with ['Not Applicable', 'Not Applicable']
the cuisine doesn't match with ['American (New)', 'Not Applicable']
the cuisine match with ['Italian', 'Mediterranean']
the cuisine match with ['Italian', 'Not Applicable']
1 / 1 True


In [236]:
#restaurant 
restaurants_numerator = len(eval_test['restaurants'])
restaurants_denominator = len(eval_test['restaurants'])
restaurants_satisfied = True

restaurants_limits = eval_test['restaurants']
restaurants_category = [cat[5:] for cat in restaurants_limits]



all_meals = []
for day in plan_test:
    all_meals.append(day['breakfast'])
    all_meals.append(day['lunch'])
    all_meals.append(day['dinner'])

for cat in restaurants_category:
    restaurants_acceptable_list = []
    restaurants_acceptable_list.append('good ' + cat)
    restaurants_acceptable_list.append('excellent ' + cat)
    print(restaurants_acceptable_list)

    for restaurant_id in set(all_meals): 
        if restaurant_id != -1 and restaurant_id != -2:
            for restaurant in restaurants:
                if(restaurant['business_id'] == restaurant_id):
                    if(restaurant[cat] in restaurants_acceptable_list):
                        print(f"the {cat} is acceptable which is: ", restaurant[cat])
                    else:
                        print(f"the {cat} is not acceptable which is: ", restaurant[cat])
                        restaurants_satisfied = False
                        restaurants_numerator -= 1





print(restaurants_numerator, "/", restaurants_denominator, restaurants_satisfied)

['good flavor', 'excellent flavor']
the flavor is acceptable which is:  good flavor
the flavor is acceptable which is:  good flavor
the flavor is not acceptable which is:  average flavor
the flavor is acceptable which is:  good flavor
['good freshness', 'excellent freshness']
the freshness is acceptable which is:  good freshness
the freshness is acceptable which is:  good freshness
the freshness is acceptable which is:  good freshness
the freshness is acceptable which is:  excellent freshness
1 / 2 False


In [231]:
#Hotel

hotel_numerator = len(eval_test['hotel'])
hotel_denominator = len(eval_test['hotel'])
hotel_satisfied = True

hotel_limit = eval['hotel']
hotel_cat = [cat[5:] for cat in hotel_limit]

all_hotels = []
for day in plan_test:
    hotel_id = day['accommodation']
    all_hotels.append(hotel_id)

for cat in hotel_cat:
    hotel_acceptable_list = []
    hotel_acceptable_list.append('good ' + cat)
    hotel_acceptable_list.append('excellent ' + cat)

    for hotel_id in set(all_hotels):
        print(hotel_id)
        if hotel_id != -1 and hotel_id != -2:
            for hotel in hotels:
                if(hotel['business_id'] == hotel_id):    
                    if(hotel[cat] in hotel_acceptable_list):
                        print("the hotel is acceptable which is: ", hotel[cat])
                    else:
                        print("the hotel is not acceptable which is: ", hotel[cat])
                        hotel_satisfied = False
                        hotel_numerator -= 1


#note: only for hotel, we need to consider what if no recommendation, which means
# all -2, we don't consider this in other categories since there is low chance that
#llm didn't provide any reccommendation for food or attractions. 
if(all(x == -2 for x in all_hotels)):
    hotel_numerator= 0 
    hotel_denominator = 0

print(hotel_numerator, "/", hotel_denominator, hotel_satisfied)

QMHd6Z2djm_SBLzGhLvFqw
the hotel is acceptable which is:  good quality
QMHd6Z2djm_SBLzGhLvFqw
the hotel is not acceptable which is:  average location
1 / 2 False


In [239]:
micro = (day_numerator + price_numerator + orientation_numerator + cuisine_numerator + restaurants_numerator + hotel_numerator,day_denominator + price_denominator + orientation_denominator + cuisine_denominator + restaurants_denominator + hotel_denominator)
micro

(5, 8)

In [240]:
macro = (day_satisfied & price_satisfied & orientation_satisfied & cuisine_satisfied & restaurants_satisfied & hotel_satisfied)
macro

False

Optimized

TSP

In [1]:
import json
import numpy as np
import sys

In [2]:
plan = [{'days': '1',
  'breakfast': 'TwnzM8mJn_nT2PJf1x-9kQ',
  'morning_attractions': ['Ib9HV7ekw459jM1Ksdiyiw'],
  'lunch': 'wHkYLlZyPXllrQRlvidUlg',
  'afternoon_attractions': ['kbEVlzQLcYS3JSQPG9QMOQ',
   '8_O6LXLyMgpq1g9CIwcW4w'],
  'dinner': 'S8ZFYEgMejpChID8tzKo9A',
  'night_attractions': ['pxZAz8pv18wK_t-m8WpN0g'],
  'accommodation': '3QPAh9VvYNTnqAFgBeBcng'},
 {'days': '2',
  'breakfast': 'rYqmaOIULRouz_1db07OdQ',
  'morning_attractions': ['Qw7tz-UkPrpXaVidWuab4Q'],
  'lunch': '6ajnOk0GcY9xbb5Ocaw8Gw',
  'afternoon_attractions': ['4mWzXhD8vo0bABVCGAhlqA',
   'ytynqOUb3hjKeJfRj5Tshw'],
  'dinner': '05ev984NYfimRN0UiFrxaA',
  'night_attractions': ['dBRWKIS7h-qZCi8EjUJ2HA'],
  'accommodation': '3QPAh9VvYNTnqAFgBeBcng'}] 

with open('Datasets/Attractions_task1.jsonl', 'r') as f:
    data = [json.loads(line) for line in f]

with open('Datasets/Hotels_task1.jsonl', 'r') as f:
    data_hotel = [json.loads(line) for line in f]

def getCordinate(id):
    for attraction in data:
        if attraction['business_id'] == id:
            return (attraction['latitude'], attraction['longitude'])
def getCordinate_Hotel(id):
    for hotel in data_hotel:
        if hotel['business_id'] == id:
            return (hotel['latitude'], hotel['longitude'])

In [3]:
#get the cordinates
cordinates = []
for day in plan:
    cordinate_one_day = []
    
    cordinate_one_day.append(getCordinate_Hotel(day['accommodation']))
    
    for attraction in day['morning_attractions']:
        cordinate_one_day.append(getCordinate(attraction))
    for attraction in day['afternoon_attractions']:
        cordinate_one_day.append(getCordinate(attraction))
    for attraction in day['night_attractions']:
        cordinate_one_day.append(getCordinate(attraction))
            
    cordinates.append(cordinate_one_day)
cordinates

[[(39.946396818, -75.1609669488),
  (39.9488980012, -75.1500296367),
  (39.9495774962, -75.1503095688),
  (39.953893, -75.149068),
  (39.9445424967, -75.1414754636)],
 [(39.946396818, -75.1609669488),
  (39.965573, -75.180969),
  (39.942712, -75.159313),
  (39.9533414645, -75.1588545174),
  (39.9582109, -75.1731373822)]]

In [4]:
def getDistanceMatrix(cordinates):
    n = len(cordinates)
    distance_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            distance_matrix[i][j] = distance_matrix[j][i] = ((cordinates[i][0]*1000 - cordinates[j][0]*1000)**2 + (cordinates[i][1]*1000 - cordinates[j][1]*1000)**2)**0.5
    return distance_matrix

In [5]:
cordinates_one_day = cordinates[1]

distance_matrix = getDistanceMatrix(cordinates_one_day)
distance_matrix

array([[ 0.        , 27.70934875,  4.03898878,  7.25882094, 16.96148527],
       [27.70934875,  0.        , 31.48980243, 25.27173918, 10.7487094 ],
       [ 4.03898878, 31.48980243,  0.        , 10.63934781, 20.76847237],
       [ 7.25882094, 25.27173918, 10.63934781,  0.        , 15.09011693],
       [16.96148527, 10.7487094 , 20.76847237, 15.09011693,  0.        ]])

In [6]:
def totalCost(mask, pos, n, cost, info_lists):
    distance_list = []
    i_list = []
    # Base case: if all cities are visited, return the
    # cost to return to the starting city (0)
    if mask == (1 << n) - 1:
        return cost[pos][0]

    ans = sys.maxsize   

    # Try visiting every city that has not been visited yet
    for i in range(n):
        if (mask & (1 << i)) == 0: 
            i_list.append(i)
            # If city i is not visited, visit it and 
             #  update the mask
            distance_list.append(cost[pos][i] +
                      totalCost(mask | (1 << i), i, n, cost, info_lists))
        

    info_list = [i_list, distance_list]
    info_lists.append(info_list)
    
    ans = min(distance_list)
    return ans

In [7]:
n = len(distance_matrix)
info_list = []
optimized_distance = totalCost(1, 0, n, distance_matrix,info_list)
print("the short distance is: ", optimized_distance)

the short distance is:  67.66027044284176


In [8]:
#get a list of 1 to n
candidates = list(range(n-1))
#add 1 to the values
candidates = [x+1 for x in candidates]

moves = []

while len(candidates) > 0:
    #find the last one in the lnfo_list
    for i in range(len(info_list)):
        if info_list[i][0] == candidates:
            next_move = info_list[i][0][np.argmin(info_list[i][1])]
            moves.append(next_move)
            #take next move out of candidates
            candidates.remove(next_move)

moves_reversed = moves[::-1]
optimized_route = [[0] + moves, [0] + moves_reversed]
print(" the shortest path is: ", optimized_route)
            

 the shortest path is:  [[0, 2, 3, 1, 4], [0, 4, 1, 3, 2]]


In [None]:
#Position Deviation
output_route = list(range(n))
gap_1 = sum([1 if x != y else 0 for x,y in zip(output_route,optimized_route[0])])/n
gap_2 = sum([1 if x != y else 0 for x,y in zip(output_route,optimized_route[1])])/n
print(min(gap_1,gap_2)) 

0.6
