In [73]:
import sys
import os

# 获取当前notebook的绝对路径
current_dir = os.path.abspath('')
# 获取项目根目录的路径（src的父目录）
project_root = os.path.dirname(os.path.dirname(current_dir))
sys.path.append(project_root)


from src.evaluation.evaluation import *

### Get random properties as test set

In [74]:
random_properties, random_property_ids = get_random_properties(20)
random_properties[0], random_property_ids


({'id': 26549061,
  'name': 'NEW LISTING! Beautiful home w/ lanai, central location & easy beach access!',
  'description': 'This property is a (Phone number hidden by Airbnb) square feet,  3 Bedroom, 2 bathroom accommodation located in Hanalei.  NEW LISTING! Beautiful home w/ lanai, central location & easy beach access! offers 3 Queen bed. It can host 6 people.  This accommodation features kitchen. Please see the description below or inquire for more details! Hale Kailea is a tastefully furnished modern home in historic Hanalei Bay! Its central location allows you to be in the center of it all, while still providing a private, comfortable sanctuary to recover from your time in the sun, sand, and sea.  Enjoy barbecuing on the shaded, wrap-around lanai and sharing a meal with your ohana (family and friends) around the custom dining table with seating for six. The open kitchen and cathedral ceiling in the living area allow you to entertain with ease. The bedrooms offer ample room for acc

### Generate queries for these properties

In [75]:
random_properties_and_queries = generate_queries(random_properties)

In [76]:
random_properties_and_queries

[{'id': 26549061,
  'name': 'NEW LISTING! Beautiful home w/ lanai, central location & easy beach access!',
  'description': 'This property is a (Phone number hidden by Airbnb) square feet,  3 Bedroom, 2 bathroom accommodation located in Hanalei.  NEW LISTING! Beautiful home w/ lanai, central location & easy beach access! offers 3 Queen bed. It can host 6 people.  This accommodation features kitchen. Please see the description below or inquire for more details! Hale Kailea is a tastefully furnished modern home in historic Hanalei Bay! Its central location allows you to be in the center of it all, while still providing a private, comfortable sanctuary to recover from your time in the sun, sand, and sea.  Enjoy barbecuing on the shaded, wrap-around lanai and sharing a meal with your ohana (family and friends) around the custom dining table with seating for six. The open kitchen and cathedral ceiling in the living area allow you to entertain with ease. The bedrooms offer ample room for acc

In [77]:
sample_queries = [property['generated_query'] for property in random_properties_and_queries]
sample_queries

['Modern 3-bedroom home in Hanalei with 2 bathrooms, spacious kitchen, wrap-around lanai for barbecues, central location near beaches, comfortable queen beds for six, and a stylish, private sanctuary to relax after beach days.',
 'Cozy, sophisticated flat in Beşiktaş with three bedrooms, fully furnished kitchen, shared living room and bathroom, international TV channels, and peaceful atmosphere. Located just 2 minutes from the seaside, central market, cafes, restaurants, bars, and ferry ports in a lively yet quiet neighborhood.',
 "Modern renovated apartment in Mong Kok center near Lanham Shopping Arcade, Lady's Street, Sneakers Street, with access to quality restaurants, just 3 minutes to Mongkok MTR and 4 minutes to Yau Ma Tei MTR, perfect for exploring vibrant shopping and dining options anytime.",
 "Confortable loft d'une chambre avec balcon offrant une vue sur le Plateau, proche de Saint Laurent, Quartier des Spectacles et Mont Royal, idéal pour couple ou petits groupes, avec cuis

In [78]:
from typing import Optional
from pydantic import BaseModel, Field
from typing import List, Dict, Any
from src.data_models import Address, ImageDescrib
import pandas as pd 
from src.hybrid_search import *


collection = get_collection()
hybrid_searcher = HybridSearch(collection)


class SearchResultItem(BaseModel):
    id: int = Field(alias='_id')
    name: str
    accommodates: Optional[int] = None
    address: Address
    summary: Optional[str] = None
    description: Optional[str] = None
    neighborhood_overview: Optional[str] = None
    notes: Optional[str] = None
    images: ImageDescrib
    search_score: Optional[float] = None
    reviews: Optional[List[Dict[str, Any]]] = None

### Retrive top 10 most similar listings from database for each property

In [79]:
retrieved_results = []
for query in sample_queries:
    results = hybrid_searcher.do_search(query)
    search_results_models = [SearchResultItem(**result)  for result in results]
    search_results_df = pd.DataFrame([item.model_dump() for item in search_results_models])
    retrieved_results.append(search_results_df)

    
retrieved_results[0]['description']

0    This property is a (Phone number hidden by Air...
1    Kick back on a sunny deck and see mountains in...
2    Completely updated inside, this elegant and sp...
3    This newly renovated contemporary modern home ...
4    Tasteful Hawaiian decor makes this home away f...
5    Newly remodeled! Gorgeous home with fully remo...
6    "Lani Beach House" Aloha - Please do not reser...
7    Located just steps away from breathtaking suns...
8    ★ Mar 2-17 and Apr 8-11 are discounted to $454...
9    Comfortable 3-bedroom air conditioned home on ...
Name: description, dtype: object

In [80]:
retrieved_ids = []
for result in retrieved_results:
    retrieved_ids.append(result['id'].tolist())

### Check if random property ids are in the top k retrieved ids

In [81]:
top_k_positions = []
for i in range(len(retrieved_ids)):
    top_k_positions.append(check_top_k_positions(retrieved_ids[i], random_property_ids[i]))
    
top_k_positions

[array([1, 1, 1, 1]),
 array([1, 1, 1, 1]),
 array([0, 0, 0, 0]),
 array([0, 0, 0, 0]),
 array([1, 1, 1, 1]),
 array([0, 0, 0, 0]),
 array([0, 0, 0, 0]),
 array([1, 1, 1, 1]),
 array([1, 1, 1, 1]),
 array([0, 0, 0, 0]),
 array([0, 0, 0, 0]),
 array([1, 1, 1, 1]),
 array([0, 0, 0, 0]),
 array([1, 1, 1, 1]),
 array([1, 1, 1, 1]),
 array([0, 0, 1, 1]),
 array([0, 1, 1, 1]),
 array([1, 1, 1, 1]),
 array([1, 1, 1, 1]),
 array([0, 0, 0, 0])]

In [82]:
# Convert top_k_positions list to DataFrame
top_k_df = pd.DataFrame(top_k_positions, columns=['top_1', 'top_3', 'top_5', 'top_10'])
top_1 = sum(top_k_df['top_1'].tolist())/len(top_k_df)
top_3 = sum(top_k_df['top_3'].tolist())/len(top_k_df)
top_5 = sum(top_k_df['top_5'].tolist())/len(top_k_df)
top_10 = sum(top_k_df['top_10'].tolist())/len(top_k_df)

top_1, top_3, top_5, top_10

(0.5, 0.55, 0.6, 0.6)

### Using Ragas to evaluate the response by LLM
- Context Recall 
- Faithfulness
- Factual correctness

Context Recall measures how many of the relevant documents (or pieces of information) were successfully retrieved. It focuses on not missing important results. Higher recall means fewer relevant documents were left out. In short, recall is about not missing anything important. Since it is about not missing anything, calculating context recall always requires a reference to compare against.

The Faithfulness metric measures how factually consistent a response is with the retrieved context. It ranges from 0 to 1, with higher scores indicating better consistency.

FactualCorrectness is a metric class that evaluates the factual correctness of responses generated by a language model. It uses claim decomposition and natural language inference (NLI) to verify the claims made in the responses against reference texts.

In [83]:
expected_responses = []
for i in range(len(sample_queries)):
    expected_responses.append(random_properties[i]['description'])


In [84]:
top_1_retrieved_contexts = []
top_1_retrieved_contexts = [retrieved_results[i]['description'][0] for i in range(len(retrieved_results))]
top_1_retrieved_contexts


['This property is a (Phone number hidden by Airbnb) square feet,  3 Bedroom, 2 bathroom accommodation located in Hanalei.  NEW LISTING! Beautiful home w/ lanai, central location & easy beach access! offers 3 Queen bed. It can host 6 people.  This accommodation features kitchen. Please see the description below or inquire for more details! Hale Kailea is a tastefully furnished modern home in historic Hanalei Bay! Its central location allows you to be in the center of it all, while still providing a private, comfortable sanctuary to recover from your time in the sun, sand, and sea.  Enjoy barbecuing on the shaded, wrap-around lanai and sharing a meal with your ohana (family and friends) around the custom dining table with seating for six. The open kitchen and cathedral ceiling in the living area allow you to entertain with ease. The bedrooms offer ample room for accommodating six people comfortably and each is equipped with a queen bed and the perfect balance of sunlight, shade, and air

In [85]:
def generate_response(query, context):      
    prompt_template = ChatPromptTemplate.from_messages([
                ("system", """You are an Airbnb listing recommendation system. Please:
                1. Respond in the same language as the user
                2. If the user is asking for property recommendations:
                   - Prioritize results with higher search scores
                   - Include the Airbnb listing URL and image URL
                   - Explain why you chose these properties
                   - Highlight features that match the user's criteria
                3. If the user has provided an image, consider visual similarity in your recommendations
                4. Be friendly and helpful in your responses
                5, answer the question in the same language as the query"""),
                ("human", "Answer this user query: {query} with the following context:\n{context}")
            ])

    formatted_messages = prompt_template.format_messages(query=query, context=context)
    return rag_llm(formatted_messages)  

In [86]:
dataset = []

for query, reference, context in zip(sample_queries, expected_responses, top_1_retrieved_contexts):
    response = generate_response(query, context)
    dataset.append(
        {
            "user_input":query,
            "retrieved_contexts": [context],
            "response":response.content,
            "reference":reference
        }
    )

In [87]:
dataset

[{'user_input': 'Modern 3-bedroom home in Hanalei with 2 bathrooms, spacious kitchen, wrap-around lanai for barbecues, central location near beaches, comfortable queen beds for six, and a stylish, private sanctuary to relax after beach days.',
  'retrieved_contexts': ['This property is a (Phone number hidden by Airbnb) square feet,  3 Bedroom, 2 bathroom accommodation located in Hanalei.  NEW LISTING! Beautiful home w/ lanai, central location & easy beach access! offers 3 Queen bed. It can host 6 people.  This accommodation features kitchen. Please see the description below or inquire for more details! Hale Kailea is a tastefully furnished modern home in historic Hanalei Bay! Its central location allows you to be in the center of it all, while still providing a private, comfortable sanctuary to recover from your time in the sun, sand, and sea.  Enjoy barbecuing on the shaded, wrap-around lanai and sharing a meal with your ohana (family and friends) around the custom dining table with s

In [88]:
from ragas import EvaluationDataset
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness


evaluation_dataset = EvaluationDataset.from_list(dataset)
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))

In [89]:
result = evaluate(dataset=evaluation_dataset,metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],llm=evaluator_llm)
result


Evaluating: 100%|██████████| 60/60 [00:40<00:00,  1.48it/s]


{'context_recall': 0.8309, 'faithfulness': 0.7652, 'factual_correctness(mode=f1)': 0.5640}

In [90]:
result.upload() 

Evaluation results uploaded! View at https://app.ragas.io/dashboard/alignment/evaluation/19f63fdc-25cd-47c1-ba08-4f202fc14ff9


'https://app.ragas.io/dashboard/alignment/evaluation/19f63fdc-25cd-47c1-ba08-4f202fc14ff9'