In [None]:
import pickle

In [None]:
from rag import gpt, build_prompt
from retrieval import elastic_search, elastic__vector_search, fais__vector_search

### User input

In [None]:
# Define the month in the dropdown
month_dropdown = widgets.Dropdown(
    options=["January", "February", "March", "April", "May", "June", 
             "July", "August", "September", "October", "November", "December"],
    value='January',  # Default value
    description='Month:',
)

# Display the dropdown widget
display(month_dropdown)

# Function to handle the value change and store the result in a variable
def on_value_change(change):
    selected_option = change['new']  # Save the selected option to a variable
    print(f"Selected option: {selected_option}")

# Observe value change in the dropdown
month_dropdown.observe(on_value_change, names='value')


In [None]:
# To directly access the selected value, you can also just do:
month = month_dropdown.value
month

In [None]:
# Define the duration in the dropdown
days_dropdown = widgets.Dropdown(
    options=["One", "Two", "Three", "Four", "Five", "Six", "Seven"],
    value='One',  # Default value
    description='Duration:',
)

# Display the dropdown widget
display(days_dropdown)

# Function to handle the value change and store the result in a variable
def on_value_change(change):
    selected_option = change['new']  # Save the selected option to a variable
    print(f"Selected option: {selected_option}")

# Observe value change in the dropdown
days_dropdown.observe(on_value_change, names='value')


In [None]:
# To directly access the selected value, you can also just do:
query = days_dropdown.value.lower()
query

In [None]:
# Define the activities in the dropdown
dropdown = widgets.SelectMultiple(
    options=['sightseeing', 'dinning', 'adventure', 'relaxation', 'culture'],
    value=['sightseeing'],  # Default value
    description='Activities:',
)

# Display the dropdown widget
display(dropdown)

# Function to handle the value change and store the result in a variable
def on_value_change(change):
    selected_option = change['new']  # Save the selected option to a variable
    print(f"Selected option: {selected_option}")

# Observe value change in the dropdown
dropdown.observe(on_value_change, names='value')


In [None]:
# To directly access the selected value, you can also just do:
activities = dropdown.value
activities

In [None]:
activity_list = activities_df[activities_df['activity'].isin(activities)]

### Retrieval evaluation 

In [None]:
# Open and read the JSON file
with open("../data/travel_itirenary_with_ids.json", "r") as f_out:
    docs_fin = json.load(f_out)

In [None]:
with open("../data/searches.bin", "rb") as f_out:
    searches = pickle.load(f_out)

In [None]:
doc_index = {d["id"]: d for d in docs_fin}

In [None]:
ground_truth_list = []
for key, value in parsed_searches.items():
    itinerary = doc_index[key]['itinerary']
    for question in value:
        ground_truth_list.append((question, itinerary, key))

In [None]:
df = pd.DataFrame(ground_truth_list, columns=["question", "itinerary", "id"])
df.to_csv("../data/ground_truth.csv", index=False)
df["index"] = [i for i in range(7) for _ in range(10)]

In [None]:
parsed_searches = {}

for k,v in searches.items(): 
    parsed_searches[k] = itinerary_list = json.loads(v)


In [None]:
df["index"] = [i for i in range(7) for _ in range(10)]

In [None]:
stic_search, stic_vector, faiss_vector = [], [], []

for question in df.question:
    stic_search.append(elastic_search(question)[1])
    stic_vector.append(elastic__vector_search(question)[1])
    faiss_vector.append(fais__vector_search(question)[1])
    
df["stic_search"] = stic_search
df["stic_vector"] = stic_vector
df["faiss_vector"] = faiss_vector

#### Hit rate for Keyword Search with Elastic

In [None]:
round(sum(df.itinerary == df.stic_search)/len(df), 2)

#### Hit rate for Keyword Search with Elastic

In [None]:
round(sum(df.itinerary == df.stic_vector)/len(df), 2)

#### Hit rate for Semantic Search with FAISS (L2 Distance)

In [None]:
round(sum(df.index == df.faiss_vector)/len(df), 2)

## RAG Evaluation

In [None]:
full_query = f"Travel itinerary for {query} days"
plan = elastic_search(full_query)[0]

In [None]:
params = {"days":query, "activities":activities, "month":month, "plan":plan, "activity_list":activity_list}

In [None]:
prompt = build_prompt(params)

In [None]:
gpt_responce = gpt(prompt)

### LLM as a Judge

In [None]:
def build_evaluation_prompt(itinerary, params):
    prompt = f"""
    You are tasked with evaluating a travel itinerary for a short trip to Barcelona. The itinerary was generated based on the following criteria:

    1. The trip duration is {params["days"]} days.
    2. The traveler prefers the following types of activities: {params["activities"]}.
    3. The trip will take place in the month of {params["month"]}, so the itinerary should account for the typical weather and seasonal activities in that month.
    4. The activities should be spread across the {params["days"]} days to provide a balanced and enjoyable experience.

    Here is the generated itinerary for evaluation:

    {itinerary}

    Please evaluate the itinerary based on the following aspects:
    - Relevance of activities to the traveler’s preferences
    - Appropriateness of activities for the weather/season
    - Distribution and balance of activities across the trip duration
    - Diversity of activities (if applicable)
    - Completeness of the itinerary for the given days

    Provide a single numerical score (between 1 and 10) that reflects the average score across these five aspects. Only return the average score.
    """.strip()

    return prompt

In [None]:
acts = ['sightseeing', 'dining', 'adventure', 'relaxation', 'culture']
days = ["one", "two", "three", "four", "five", "six", "seven"]
months = ["February", "May", "August", "November"]
eval_score = []

for day in tqdm(days):
    for month in months:
        for i in range(1, 6):
            act_list = random.sample(acts, i)
            
            query = day
            activities = act_list
            activity_list = activities_df[activities_df['activity'].isin(activities)]
            
            params = {
                "days": query, 
                "activities": activities, 
                "month": month, 
                "plan": plan, 
                "activity_list": activity_list
            }
            
            full_query = f"Travel itinerary for {query} days"
            plan = elastic_search(full_query)[0]
            
            prompt = build_prompt(params)
            gpt_response = gpt(prompt)
            
            eval_prompt = build_evaluation_prompt(gpt_response, params)
            score = gpt(eval_prompt)
            
            eval_score.append(score)


In [None]:
rag_score = np.average([float(i) for i in eval_score])
print(f"Using LLM as a judge, the performance of our RAG on {len(eval_score)} prompts is {round(rag_score, 2)} out of 10.")